/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.68 - (hide annotations) (download)
Sat Mar 22 06:52:56 2008 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.67: +62 -9 lines
++ whatpm/t/ChangeLog	22 Mar 2008 06:52:53 -0000
	* content-model-2.dat: Test data on numbers of HTML4 deprecated
	attributes are added.

2008-03-22  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	22 Mar 2008 06:52:25 -0000
	* HTML.pm: body/@alink, body/@background, body/@bgcolor,
	body/@link, body/@text, body/@vlink, hn/@align, p/@align,
	br/@clear, pre/@width, ol/@compact, ul/@compact,
	dl/@compact, dl/@compact, menu/@compact, and div/@align implemented.

2008-03-22  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.58 sub FEATURE_HTML5_ROLE () {
8     Whatpm::ContentChecker::FEATURE_STATUS_WD
9     ## TODO: svg:*/@role
10     }
11    
12 wakaba 1.54 sub FEATURE_HTML5_LC () {
13     Whatpm::ContentChecker::FEATURE_STATUS_LC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16     sub FEATURE_HTML5_AT_RISK () {
17     Whatpm::ContentChecker::FEATURE_STATUS_WD |
18     Whatpm::ContentChecker::FEATURE_ALLOWED
19     }
20     sub FEATURE_HTML5_WD () {
21     Whatpm::ContentChecker::FEATURE_STATUS_WD |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_FD () {
25     Whatpm::ContentChecker::FEATURE_STATUS_WD |
26     Whatpm::ContentChecker::FEATURE_ALLOWED
27     }
28     sub FEATURE_HTML5_DEFAULT () {
29     Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31 wakaba 1.49 }
32 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
33     ## NOTE: Was part of HTML5, but was dropped.
34 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
35     }
36 wakaba 1.54 sub FEATURE_WF2 () {
37     Whatpm::ContentChecker::FEATURE_STATUS_LC |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40 wakaba 1.56 sub FEATURE_WF2_DEPRECATED () {
41     Whatpm::ContentChecker::FEATURE_STATUS_LC
42     ## NOTE: MUST NOT be used.
43     }
44 wakaba 1.49
45 wakaba 1.61 ## NOTE: Metainformation Attributes Module by W3C XHTML2 WG.
46     sub FEATURE_RDFA_LC () {
47     Whatpm::ContentChecker::FEATURE_STATUS_LC
48     }
49 wakaba 1.58
50     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
51     ## attribute can be used- the only requirements for that matter is:
52     ## "the attribute MUST be referenced using its namespace-qualified form" (and
53     ## this is a host language conformance!).
54    
55 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
56     ## NOTE: Only additions to M12N10_REC are marked.
57     Whatpm::ContentChecker::FEATURE_STATUS_CR
58     }
59     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
60     Whatpm::ContentChecker::FEATURE_STATUS_CR |
61     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
62     }
63    
64 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
65     ## It contains a number of problems. (However, again, it's a REC!)
66 wakaba 1.54 sub FEATURE_M12N10_REC () {
67     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
68     Whatpm::ContentChecker::FEATURE_STATUS_REC
69     }
70     sub FEATURE_M12N10_REC_DEPRECATED () {
71     Whatpm::ContentChecker::FEATURE_STATUS_REC |
72     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
73     }
74 wakaba 1.58 ## NOTE: XHTML M12N 1.1 is a LC at the time of writing and no
75     ## addition from 1.0.
76 wakaba 1.49
77     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
78     ## (second edition). Only missing attributes from M12N10 abstract
79     ## definition are added.
80 wakaba 1.54 sub FEATURE_XHTML10_REC () {
81     Whatpm::ContentChecker::FEATURE_STATUS_CR
82     }
83    
84 wakaba 1.61 ## NOTE: Diff from HTML4.
85     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
86     Whatpm::ContentChecker::FEATURE_STATUS_CR
87     }
88 wakaba 1.58
89 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
90     ## 4.01). Only missing attributes from XHTML10 are added.
91 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
92     Whatpm::ContentChecker::FEATURE_STATUS_WD
93     }
94    
95     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
96     ## rather than presentational attributes (deprecated or not deprecated).
97 wakaba 1.48
98 wakaba 1.61 ## NOTE: Diff from HTML4.
99     sub FEATURE_HTML32_REC_OBSOLETE () {
100     Whatpm::ContentChecker::FEATURE_STATUS_CR |
101     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
102     ## NOTE: Lowercase normative "should".
103     }
104    
105     sub FEATURE_RFC2659 () { ## Experimental RFC
106     Whatpm::ContentChecker::FEATURE_STATUS_CR
107     }
108    
109     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
110     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
111     Whatpm::ContentChecker::FEATURE_STATUS_CR
112     }
113    
114     ## NOTE: Diff from HTML 2.0.
115     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
116     Whatpm::ContentChecker::FEATURE_STATUS_CR
117     }
118    
119     ## NOTE: Diff from HTML 3.2.
120     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
121     Whatpm::ContentChecker::FEATURE_STATUS_CR
122     }
123 wakaba 1.58
124 wakaba 1.29 ## December 2007 HTML5 Classification
125    
126     my $HTMLMetadataContent = {
127     $HTML_NS => {
128     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
129     'event-source' => 1, command => 1, datatemplate => 1,
130     ## NOTE: A |meta| with no |name| element is not allowed as
131     ## a metadata content other than |head| element.
132     meta => 1,
133 wakaba 1.56 ## NOTE: Only when empty [WF2]
134     form => 1,
135 wakaba 1.29 },
136     ## NOTE: RDF is mentioned in the HTML5 spec.
137     ## TODO: Other RDF elements?
138     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
139     };
140    
141     my $HTMLProseContent = {
142     $HTML_NS => {
143     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
144     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
145     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
146     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
147     details => 1, ## ISSUE: "Prose element" in spec.
148     datagrid => 1, ## ISSUE: "Prose element" in spec.
149     datatemplate => 1,
150     div => 1, ## ISSUE: No category in spec.
151     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
152     ## Additionally, it must be before any other element or
153     ## non-inter-element-whitespace text node.
154     style => 1,
155    
156 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
157 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
158     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
159     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
160     command => 1, font => 1,
161     a => 1,
162     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
163     ## NOTE: |area| is allowed only as a descendant of |map|.
164     area => 1,
165    
166     ins => 1, del => 1,
167    
168     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
169     menu => 1,
170    
171     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
172     canvas => 1,
173     },
174    
175     ## NOTE: Embedded
176     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
177     q<http://www.w3.org/2000/svg> => {svg => 1},
178     };
179    
180 wakaba 1.58 my $HTMLSectioningContent = {
181 wakaba 1.57 $HTML_NS => {
182     section => 1, nav => 1, article => 1, aside => 1,
183     ## NOTE: |body| is only allowed in |html| element.
184     body => 1,
185     },
186     };
187    
188 wakaba 1.58 my $HTMLSectioningRoot = {
189 wakaba 1.29 $HTML_NS => {
190 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
191 wakaba 1.29 },
192     };
193    
194     my $HTMLHeadingContent = {
195     $HTML_NS => {
196     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
197     },
198     };
199    
200     my $HTMLPhrasingContent = {
201     ## NOTE: All phrasing content is also prose content.
202     $HTML_NS => {
203 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
204 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
205     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
206     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
207     command => 1, font => 1,
208     a => 1,
209     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
210     ## NOTE: |area| is allowed only as a descendant of |map|.
211     area => 1,
212    
213     ## NOTE: Transparent.
214     ins => 1, del => 1,
215    
216     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
217     menu => 1,
218    
219     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
220     canvas => 1,
221 wakaba 1.56
222     ## NOTE: WF2
223     input => 1, ## NOTE: type=hidden
224     datalist => 1, ## NOTE: block | where |select| allowed
225 wakaba 1.29 },
226    
227     ## NOTE: Embedded
228     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
229     q<http://www.w3.org/2000/svg> => {svg => 1},
230    
231     ## NOTE: And non-inter-element-whitespace text nodes.
232     };
233    
234 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
235 wakaba 1.29
236     my $HTMLInteractiveContent = {
237     $HTML_NS => {
238     a => 1,
239 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
240 wakaba 1.29 },
241     };
242    
243 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
244     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
245    
246     ## -- Common attribute syntacx checkers
247    
248 wakaba 1.1 our $AttrChecker;
249    
250     my $GetHTMLEnumeratedAttrChecker = sub {
251     my $states = shift; # {value => conforming ? 1 : -1}
252     return sub {
253     my ($self, $attr) = @_;
254     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
255     if ($states->{$value} > 0) {
256     #
257     } elsif ($states->{$value}) {
258     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
259     } else {
260     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
261     }
262     };
263     }; # $GetHTMLEnumeratedAttrChecker
264    
265     my $GetHTMLBooleanAttrChecker = sub {
266     my $local_name = shift;
267     return sub {
268     my ($self, $attr) = @_;
269     my $value = $attr->value;
270     unless ($value eq $local_name or $value eq '') {
271     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
272     }
273     };
274     }; # $GetHTMLBooleanAttrChecker
275    
276 wakaba 1.8 ## Unordered set of space-separated tokens
277 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
278 wakaba 1.8 my ($self, $attr) = @_;
279     my %word;
280     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
281     unless ($word{$word}) {
282     $word{$word} = 1;
283     } else {
284     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
285     }
286     }
287 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
288 wakaba 1.8
289 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
290     ## whose allowed values are defined by the section on link types)
291     my $HTMLLinkTypesAttrChecker = sub {
292 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
293 wakaba 1.1 my %word;
294     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
295     unless ($word{$word}) {
296     $word{$word} = 1;
297 wakaba 1.18 } elsif ($word eq 'up') {
298     #
299 wakaba 1.1 } else {
300     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
301     }
302     }
303     ## NOTE: Case sensitive match (since HTML5 spec does not say link
304     ## types are case-insensitive and it says "The value should not
305     ## be confusingly similar to any other defined value (e.g.
306     ## differing only in case).").
307     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
308     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
309     ## values to be used conformingly.
310 wakaba 1.66
311     my $is_hyperlink;
312     my $is_resource;
313 wakaba 1.1 require Whatpm::_LinkTypeList;
314     our $LinkType;
315     for my $word (keys %word) {
316     my $def = $LinkType->{$word};
317     if (defined $def) {
318     if ($def->{status} eq 'accepted') {
319     if (defined $def->{effect}->[$a_or_area]) {
320     #
321     } else {
322     $self->{onerror}->(node => $attr,
323     type => 'link type:bad context:'.$word);
324     }
325     } elsif ($def->{status} eq 'proposal') {
326     $self->{onerror}->(node => $attr, level => 's',
327     type => 'link type:proposed:'.$word);
328 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
329     #
330     } else {
331     $self->{onerror}->(node => $attr,
332     type => 'link type:bad context:'.$word);
333     }
334 wakaba 1.1 } else { # rejected or synonym
335     $self->{onerror}->(node => $attr,
336     type => 'link type:non-conforming:'.$word);
337     }
338 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
339     if ($word eq 'alternate') {
340     #
341     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
342 wakaba 1.66 $is_hyperlink = 1;
343 wakaba 1.4 }
344     }
345 wakaba 1.1 if ($def->{unique}) {
346     unless ($self->{has_link_type}->{$word}) {
347     $self->{has_link_type}->{$word} = 1;
348     } else {
349     $self->{onerror}->(node => $attr,
350     type => 'link type:duplicate:'.$word);
351     }
352     }
353 wakaba 1.66
354     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
355     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
356     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
357     }
358 wakaba 1.1 } else {
359     $self->{onerror}->(node => $attr, level => 'unsupported',
360     type => 'link type:'.$word);
361     }
362     }
363 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
364 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
365     ## says that using both X-Pingback: header field and HTML
366     ## <link rel=pingback> is deprecated and if both appears they
367     ## SHOULD contain exactly the same value.
368     ## ISSUE: Pingback 1.0 specification defines the exact representation
369     ## of its link element, which cannot be tested by the current arch.
370     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
371     ## include any string that matches to the pattern for the rel=pingback link,
372     ## which again inpossible to test.
373     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
374 wakaba 1.12
375     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
376 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
377     ## then they SHOULD be described in different paragraphs.".
378 wakaba 1.66
379     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
380     if ($is_hyperlink or $a_or_area) {
381     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
382     }
383     if ($is_resource and not $a_or_area) {
384     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
385     }
386 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
387 wakaba 1.20
388     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
389 wakaba 1.1
390     ## URI (or IRI)
391     my $HTMLURIAttrChecker = sub {
392 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
393 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
394     my $value = $attr->value;
395     Whatpm::URIChecker->check_iri_reference ($value, sub {
396     my %opt = @_;
397     $self->{onerror}->(node => $attr, level => $opt{level},
398     type => 'URI::'.$opt{type}.
399     (defined $opt{position} ? ':'.$opt{position} : ''));
400     });
401 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
402 wakaba 1.66
403     my $attr_name = $attr->name;
404     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
405     ## TODO: absolute
406     push @{$self->{return}->{uri}->{$value} ||= []},
407     $element_state->{uri_info}->{$attr_name};
408 wakaba 1.1 }; # $HTMLURIAttrChecker
409    
410     ## A space separated list of one or more URIs (or IRIs)
411     my $HTMLSpaceURIsAttrChecker = sub {
412     my ($self, $attr) = @_;
413 wakaba 1.66
414     my $type = {ping => 'action',
415     profile => 'namespace',
416     archive => 'resource'}->{$attr->name};
417    
418 wakaba 1.1 my $i = 0;
419     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
420     Whatpm::URIChecker->check_iri_reference ($value, sub {
421     my %opt = @_;
422     $self->{onerror}->(node => $attr, level => $opt{level},
423 wakaba 1.2 type => 'URIs:'.':'.
424     $opt{type}.':'.$i.
425 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
426     });
427 wakaba 1.66
428     ## TODO: absolute
429     push @{$self->{return}->{uri}->{$value} ||= []},
430 wakaba 1.67 {node => $attr, type => {$type => 1}};
431 wakaba 1.66
432 wakaba 1.1 $i++;
433     }
434 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
435 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
436     ## ISSUE: A sequence of white space characters are conformant?
437     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
438     ## NOTE: Duplication seems not an error.
439 wakaba 1.4 $self->{has_uri_attr} = 1;
440 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
441    
442     my $HTMLDatetimeAttrChecker = sub {
443     my ($self, $attr) = @_;
444     my $value = $attr->value;
445     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
446     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
447     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
448     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
449     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
450     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
451     if $d < 1 or
452     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
453     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
454     if $M == 2 and $d == 29 and
455     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
456     } else {
457     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
458     }
459     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
460     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
461     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
462     if defined $s and $s > 59;
463     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
464     if $zh > 23;
465     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
466     if $zm > 59;
467     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
468     } else {
469     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
470     }
471     }; # $HTMLDatetimeAttrChecker
472    
473     my $HTMLIntegerAttrChecker = sub {
474     my ($self, $attr) = @_;
475     my $value = $attr->value;
476     unless ($value =~ /\A-?[0-9]+\z/) {
477     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
478     }
479     }; # $HTMLIntegerAttrChecker
480    
481     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
482     my $range_check = shift;
483     return sub {
484     my ($self, $attr) = @_;
485     my $value = $attr->value;
486     if ($value =~ /\A[0-9]+\z/) {
487     unless ($range_check->($value + 0)) {
488     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
489     }
490     } else {
491     $self->{onerror}->(node => $attr,
492     type => 'nninteger:syntax error');
493     }
494     };
495     }; # $GetHTMLNonNegativeIntegerAttrChecker
496    
497     my $GetHTMLFloatingPointNumberAttrChecker = sub {
498     my $range_check = shift;
499     return sub {
500     my ($self, $attr) = @_;
501     my $value = $attr->value;
502     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
503     unless ($range_check->($value + 0)) {
504     $self->{onerror}->(node => $attr, type => 'float:out of range');
505     }
506     } else {
507     $self->{onerror}->(node => $attr,
508     type => 'float:syntax error');
509     }
510     };
511     }; # $GetHTMLFloatingPointNumberAttrChecker
512    
513     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
514     ## ISSUE: RFC 2046 does not define syntax of media types.
515     ## ISSUE: The definition of "a valid MIME type" is unknown.
516     ## Syntactical correctness?
517     my $HTMLIMTAttrChecker = sub {
518     my ($self, $attr) = @_;
519     my $value = $attr->value;
520     ## ISSUE: RFC 2045 Content-Type header field allows insertion
521     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
522     ## ISSUE: RFC 2231 extension? Maybe no.
523     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
524     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
525     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
526     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
527     my @type = ($1, $2);
528     my $param = $3;
529     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
530     if (defined $2) {
531     push @type, $1 => $2;
532     } else {
533     my $n = $1;
534     my $v = $2;
535     $v =~ s/\\(.)/$1/gs;
536     push @type, $n => $v;
537     }
538     }
539     require Whatpm::IMTChecker;
540     Whatpm::IMTChecker->check_imt (sub {
541     my %opt = @_;
542     $self->{onerror}->(node => $attr, level => $opt{level},
543     type => 'IMT:'.$opt{type});
544     }, @type);
545     } else {
546     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
547     }
548     }; # $HTMLIMTAttrChecker
549    
550     my $HTMLLanguageTagAttrChecker = sub {
551 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
552    
553 wakaba 1.1 my ($self, $attr) = @_;
554 wakaba 1.6 my $value = $attr->value;
555     require Whatpm::LangTag;
556     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
557     my %opt = @_;
558     my $type = 'LangTag:'.$opt{type};
559     $type .= ':' . $opt{subtag} if defined $opt{subtag};
560     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
561     level => $opt{level});
562     });
563 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
564 wakaba 1.6
565     ## TODO: testdata
566 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
567    
568     ## "A valid media query [MQ]"
569     my $HTMLMQAttrChecker = sub {
570     my ($self, $attr) = @_;
571     $self->{onerror}->(node => $attr, level => 'unsupported',
572     type => 'media query');
573     ## ISSUE: What is "a valid media query"?
574     }; # $HTMLMQAttrChecker
575    
576     my $HTMLEventHandlerAttrChecker = sub {
577     my ($self, $attr) = @_;
578     $self->{onerror}->(node => $attr, level => 'unsupported',
579     type => 'event handler');
580     ## TODO: MUST contain valid ECMAScript code matching the
581     ## ECMAScript |FunctionBody| production. [ECMA262]
582     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
583     ## ISSUE: Automatic semicolon insertion does not apply?
584     ## ISSUE: Other script languages?
585     }; # $HTMLEventHandlerAttrChecker
586    
587     my $HTMLUsemapAttrChecker = sub {
588     my ($self, $attr) = @_;
589     ## MUST be a valid hashed ID reference to a |map| element
590     my $value = $attr->value;
591     if ($value =~ s/^#//) {
592     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
593     push @{$self->{usemap}}, [$value => $attr];
594     } else {
595     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
596     }
597     ## NOTE: Space characters in hashed ID references are conforming.
598     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
599     }; # $HTMLUsemapAttrChecker
600    
601     my $HTMLTargetAttrChecker = sub {
602     my ($self, $attr) = @_;
603     my $value = $attr->value;
604     if ($value =~ /^_/) {
605     $value = lc $value; ## ISSUE: ASCII case-insentitive?
606     unless ({
607     _self => 1, _parent => 1, _top => 1,
608     }->{$value}) {
609     $self->{onerror}->(node => $attr,
610     type => 'reserved browsing context name');
611     }
612     } else {
613 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
614 wakaba 1.1 }
615     }; # $HTMLTargetAttrChecker
616    
617 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
618     my ($self, $attr) = @_;
619    
620     ## ISSUE: Namespace resolution?
621    
622     my $value = $attr->value;
623    
624     require Whatpm::CSS::SelectorsParser;
625     my $p = Whatpm::CSS::SelectorsParser->new;
626     $p->{pseudo_class}->{$_} = 1 for qw/
627     active checked disabled empty enabled first-child first-of-type
628     focus hover indeterminate last-child last-of-type link only-child
629     only-of-type root target visited
630     lang nth-child nth-last-child nth-of-type nth-last-of-type not
631     -manakai-contains -manakai-current
632     /;
633    
634     $p->{pseudo_element}->{$_} = 1 for qw/
635     after before first-letter first-line
636     /;
637    
638     $p->{must_level} = $self->{must_level};
639     $p->{onerror} = sub {
640     my %opt = @_;
641     $opt{type} = 'selectors:'.$opt{type};
642     $self->{onerror}->(%opt, node => $attr);
643     };
644     $p->parse_string ($value);
645     }; # $HTMLSelectorsAttrChecker
646    
647 wakaba 1.66 my $HTMLAccesskeyAttrChecker = sub {
648     my ($self, $attr) = @_;
649    
650     ## NOTE: "character" or |%Character;| in HTML4.
651    
652     my $value = $attr->value;
653     if (length $value != 1) {
654     $self->{onerror}->(node => $attr, type => 'char:syntax error',
655     level => $self->{fact_level}); ## TODO: type
656     }
657    
658     ## NOTE: "Note. Authors should consider the input method of the expected
659     ## reader when specifying an accesskey." [HTML4] This is hard to implement,
660     ## since it depends on keyboard and so on.
661     ## NOTE: "We recommend that authors include the access key in label text
662     ## or wherever the access key is to apply." [HTML4] (informative)
663     }; # $HTMLAccesskeyAttrChecker
664    
665 wakaba 1.68 my $HTMLColorAttrChecker = sub {
666     my ($self, $attr) = @_;
667    
668     ## NOTE: HTML4 "color" or |%Color;|
669    
670     my $value = $attr->value;
671    
672     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
673     $self->{onerror}->(node => $attr, type => 'color:syntax error', ## TODO: type
674     level => $self->{fact_level});
675     }
676    
677     ## TODO: HTML4 has some guideline on usage of color.
678     }; # $HTMLColorAttrChecker
679    
680 wakaba 1.1 my $HTMLAttrChecker = {
681 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
682 wakaba 1.1 id => sub {
683     ## NOTE: |map| has its own variant of |id=""| checker
684     my ($self, $attr) = @_;
685     my $value = $attr->value;
686     if (length $value > 0) {
687     if ($self->{id}->{$value}) {
688     $self->{onerror}->(node => $attr, type => 'duplicate ID');
689     push @{$self->{id}->{$value}}, $attr;
690     } else {
691     $self->{id}->{$value} = [$attr];
692     }
693     if ($value =~ /[\x09-\x0D\x20]/) {
694     $self->{onerror}->(node => $attr, type => 'space in ID');
695     }
696     } else {
697     ## NOTE: MUST contain at least one character
698     $self->{onerror}->(node => $attr, type => 'empty attribute value');
699     }
700     },
701     title => sub {}, ## NOTE: No conformance creteria
702     lang => sub {
703     my ($self, $attr) = @_;
704 wakaba 1.6 my $value = $attr->value;
705     if ($value eq '') {
706     #
707     } else {
708     require Whatpm::LangTag;
709     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
710     my %opt = @_;
711     my $type = 'LangTag:'.$opt{type};
712     $type .= ':' . $opt{subtag} if defined $opt{subtag};
713     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
714     level => $opt{level});
715     });
716     }
717 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
718     unless ($attr->owner_document->manakai_is_html) {
719     $self->{onerror}->(node => $attr, type => 'in XML:lang');
720     }
721 wakaba 1.6
722     ## TODO: test data
723 wakaba 1.1 },
724     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
725     class => sub {
726     my ($self, $attr) = @_;
727     my %word;
728     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
729     unless ($word{$word}) {
730     $word{$word} = 1;
731     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
732     } else {
733     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
734     }
735     }
736     },
737 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
738     true => 1, false => 1, '' => 1,
739     }),
740 wakaba 1.1 contextmenu => sub {
741     my ($self, $attr) = @_;
742     my $value = $attr->value;
743     push @{$self->{contextmenu}}, [$value => $attr];
744     ## ISSUE: "The value must be the ID of a menu element in the DOM."
745     ## What is "in the DOM"? A menu Element node that is not part
746     ## of the Document tree is in the DOM? A menu Element node that
747     ## belong to another Document tree is in the DOM?
748     },
749 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
750 wakaba 1.56 ## TODO: repeat, repeat-start, repeat-min, repeat-max, repeat-template ## TODO: global
751 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
752 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
753     ## TODO: ref, template, registrationmark
754 wakaba 1.1 };
755    
756 wakaba 1.49 my %HTMLAttrStatus = (
757 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
758     contenteditable => FEATURE_HTML5_DEFAULT,
759     contextmenu => FEATURE_HTML5_WD,
760     dir => FEATURE_HTML5_DEFAULT,
761     draggable => FEATURE_HTML5_LC,
762     id => FEATURE_HTML5_DEFAULT,
763     irrelevant => FEATURE_HTML5_WD,
764     lang => FEATURE_HTML5_DEFAULT,
765     ref => FEATURE_HTML5_AT_RISK,
766     registrationmark => FEATURE_HTML5_AT_RISK,
767 wakaba 1.60 repeat => FEATURE_WF2,
768     'repeat-max' => FEATURE_WF2,
769     'repeat-min' => FEATURE_WF2,
770     'repeat-start' => FEATURE_WF2,
771     'repeat-template' => FEATURE_WF2,
772 wakaba 1.58 role => FEATURE_HTML5_ROLE,
773 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
774     template => FEATURE_HTML5_AT_RISK,
775     title => FEATURE_HTML5_DEFAULT,
776 wakaba 1.49 );
777    
778     my %HTMLM12NCommonAttrStatus = (
779 wakaba 1.61 about => FEATURE_RDFA_LC,
780 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
781 wakaba 1.61 content => FEATURE_RDFA_LC,
782     datatype => FEATURE_RDFA_LC,
783 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
784     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
785 wakaba 1.61 instanceof => FEATURE_RDFA_LC,
786 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
787     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
788     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
789     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
790     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
791     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
792     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
793     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
794     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
795     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
796 wakaba 1.61 property => FEATURE_RDFA_LC,
797     rel => FEATURE_RDFA_LC,
798     resource => FEATURE_RDFA_LC,
799     rev => FEATURE_RDFA_LC,
800 wakaba 1.55 style => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
801     FEATURE_M12N10_REC,
802 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
803 wakaba 1.49 );
804    
805 wakaba 1.1 for (qw/
806     onabort onbeforeunload onblur onchange onclick oncontextmenu
807     ondblclick ondrag ondragend ondragenter ondragleave ondragover
808     ondragstart ondrop onerror onfocus onkeydown onkeypress
809     onkeyup onload onmessage onmousedown onmousemove onmouseout
810     onmouseover onmouseup onmousewheel onresize onscroll onselect
811     onsubmit onunload
812     /) {
813     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
814 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
815 wakaba 1.1 }
816    
817     my $GetHTMLAttrsChecker = sub {
818     my $element_specific_checker = shift;
819 wakaba 1.49 my $element_specific_status = shift;
820 wakaba 1.1 return sub {
821 wakaba 1.40 my ($self, $item, $element_state) = @_;
822     for my $attr (@{$item->{node}->attributes}) {
823 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
824     $attr_ns = '' unless defined $attr_ns;
825     my $attr_ln = $attr->manakai_local_name;
826     my $checker;
827     if ($attr_ns eq '') {
828     $checker = $element_specific_checker->{$attr_ln}
829 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
830 wakaba 1.1 }
831     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
832 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
833 wakaba 1.1 if ($checker) {
834 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
835 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
836 wakaba 1.54 #
837 wakaba 1.1 } else {
838     $self->{onerror}->(node => $attr, level => 'unsupported',
839     type => 'attribute');
840 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
841     }
842     if ($attr_ns eq '') {
843     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
844 wakaba 1.1 }
845 wakaba 1.49 ## TODO: global attribute
846 wakaba 1.1 }
847     };
848     }; # $GetHTMLAttrsChecker
849    
850 wakaba 1.40 my %HTMLChecker = (
851     %Whatpm::ContentChecker::AnyChecker,
852 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
853 wakaba 1.40 );
854    
855     my %HTMLEmptyChecker = (
856     %HTMLChecker,
857     check_child_element => sub {
858     my ($self, $item, $child_el, $child_nsuri, $child_ln,
859     $child_is_transparent, $element_state) = @_;
860     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
861     $self->{onerror}->(node => $child_el,
862     type => 'element not allowed:minus',
863     level => $self->{must_level});
864     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
865     #
866     } else {
867     $self->{onerror}->(node => $child_el,
868     type => 'element not allowed:empty',
869     level => $self->{must_level});
870     }
871     },
872     check_child_text => sub {
873     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
874     if ($has_significant) {
875     $self->{onerror}->(node => $child_node,
876     type => 'character not allowed:empty',
877     level => $self->{must_level});
878     }
879     },
880     );
881    
882     my %HTMLTextChecker = (
883     %HTMLChecker,
884     check_child_element => sub {
885     my ($self, $item, $child_el, $child_nsuri, $child_ln,
886     $child_is_transparent, $element_state) = @_;
887     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
888     $self->{onerror}->(node => $child_el,
889     type => 'element not allowed:minus',
890     level => $self->{must_level});
891     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
892     #
893     } else {
894     $self->{onerror}->(node => $child_el, type => 'element not allowed');
895     }
896     },
897     );
898    
899 wakaba 1.58 ## TODO: Rename as "FlowContent" (HTML5 revision 1261)
900 wakaba 1.40 my %HTMLProseContentChecker = (
901     %HTMLChecker,
902     check_child_element => sub {
903     my ($self, $item, $child_el, $child_nsuri, $child_ln,
904     $child_is_transparent, $element_state) = @_;
905     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
906     $self->{onerror}->(node => $child_el,
907     type => 'element not allowed:minus',
908     level => $self->{must_level});
909     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
910     #
911     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
912     if ($element_state->{has_non_style} or
913     not $child_el->has_attribute_ns (undef, 'scoped')) {
914     $self->{onerror}->(node => $child_el,
915     type => 'element not allowed:prose style',
916     level => $self->{must_level});
917     }
918     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
919 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
920 wakaba 1.40 } else {
921     $element_state->{has_non_style} = 1;
922     $self->{onerror}->(node => $child_el,
923     type => 'element not allowed:prose',
924     level => $self->{must_level})
925     }
926     },
927     check_child_text => sub {
928     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
929     if ($has_significant) {
930     $element_state->{has_non_style} = 1;
931     }
932     },
933     check_end => sub {
934     my ($self, $item, $element_state) = @_;
935     if ($element_state->{has_significant}) {
936 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
937 wakaba 1.40 } elsif ($item->{transparent}) {
938     #
939     } else {
940     $self->{onerror}->(node => $item->{node},
941     level => $self->{should_level},
942     type => 'no significant content');
943     }
944     },
945     );
946    
947     my %HTMLPhrasingContentChecker = (
948     %HTMLChecker,
949     check_child_element => sub {
950     my ($self, $item, $child_el, $child_nsuri, $child_ln,
951     $child_is_transparent, $element_state) = @_;
952     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
953     $self->{onerror}->(node => $child_el,
954     type => 'element not allowed:minus',
955     level => $self->{must_level});
956     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
957     #
958     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
959     #
960     } else {
961     $self->{onerror}->(node => $child_el,
962     type => 'element not allowed:phrasing',
963     level => $self->{must_level});
964     }
965     },
966     check_end => $HTMLProseContentChecker{check_end},
967     ## NOTE: The definition for |li| assumes that the only differences
968     ## between prose and phrasing content checkers are |check_child_element|
969     ## and |check_child_text|.
970     );
971    
972     my %HTMLTransparentChecker = %HTMLProseContentChecker;
973     ## ISSUE: Significant content rule should be applied to transparent element
974 wakaba 1.46 ## with parent?
975 wakaba 1.40
976 wakaba 1.1 our $Element;
977     our $ElementDefault;
978    
979     $Element->{$HTML_NS}->{''} = {
980 wakaba 1.40 %HTMLChecker,
981 wakaba 1.1 };
982    
983     $Element->{$HTML_NS}->{html} = {
984 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
985 wakaba 1.1 is_root => 1,
986 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
987 wakaba 1.16 manifest => $HTMLURIAttrChecker,
988 wakaba 1.1 xmlns => sub {
989     my ($self, $attr) = @_;
990     my $value = $attr->value;
991     unless ($value eq $HTML_NS) {
992     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
993     }
994     unless ($attr->owner_document->manakai_is_html) {
995     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
996     ## TODO: Test
997     }
998 wakaba 1.66
999     ## TODO: Should be resolved?
1000     push @{$self->{return}->{uri}->{$value} ||= []},
1001     {node => $attr, type => {namespace => 1}};
1002 wakaba 1.1 },
1003 wakaba 1.67 version => sub {
1004     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1005     ## Though DTDs of various versions of HTML define the attribute
1006     ## as |#FIXED|, this conformance checker does no check for
1007     ## the attribute value, since what kind of check should be done
1008     ## is unknown.
1009     },
1010 wakaba 1.49 }, {
1011     %HTMLAttrStatus,
1012 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1013 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1014     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1015     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1016     manifest => FEATURE_HTML5_DEFAULT,
1017 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1018 wakaba 1.49 version => FEATURE_M12N10_REC,
1019 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
1020 wakaba 1.1 }),
1021 wakaba 1.40 check_start => sub {
1022     my ($self, $item, $element_state) = @_;
1023     $element_state->{phase} = 'before head';
1024 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1025 wakaba 1.40 },
1026     check_child_element => sub {
1027     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1028     $child_is_transparent, $element_state) = @_;
1029     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1030     $self->{onerror}->(node => $child_el,
1031     type => 'element not allowed:minus',
1032     level => $self->{must_level});
1033     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1034     #
1035     } elsif ($element_state->{phase} eq 'before head') {
1036     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1037     $element_state->{phase} = 'after head';
1038     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1039     $self->{onerror}->(node => $child_el,
1040     type => 'ps element missing:head');
1041     $element_state->{phase} = 'after body';
1042     } else {
1043     $self->{onerror}->(node => $child_el,
1044     type => 'element not allowed');
1045     }
1046     } elsif ($element_state->{phase} eq 'after head') {
1047     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1048     $element_state->{phase} = 'after body';
1049     } else {
1050     $self->{onerror}->(node => $child_el,
1051     type => 'element not allowed');
1052     }
1053     } elsif ($element_state->{phase} eq 'after body') {
1054     $self->{onerror}->(node => $child_el,
1055     type => 'element not allowed');
1056     } else {
1057     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1058     }
1059     },
1060     check_child_text => sub {
1061     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1062     if ($has_significant) {
1063     $self->{onerror}->(node => $child_node,
1064     type => 'character not allowed');
1065     }
1066     },
1067     check_end => sub {
1068     my ($self, $item, $element_state) = @_;
1069     if ($element_state->{phase} eq 'after body') {
1070     #
1071     } elsif ($element_state->{phase} eq 'before head') {
1072     $self->{onerror}->(node => $item->{node},
1073     type => 'child element missing:head');
1074     $self->{onerror}->(node => $item->{node},
1075     type => 'child element missing:body');
1076     } elsif ($element_state->{phase} eq 'after head') {
1077     $self->{onerror}->(node => $item->{node},
1078     type => 'child element missing:body');
1079     } else {
1080     die "check_end: Bad |html| phase: $element_state->{phase}";
1081     }
1082 wakaba 1.1
1083 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1084     },
1085     };
1086 wakaba 1.25
1087 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1088 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1089 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1090     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1091     }, {
1092 wakaba 1.49 %HTMLAttrStatus,
1093 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1094 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1095     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1096     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1097 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1098     }),
1099 wakaba 1.40 check_child_element => sub {
1100     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1101     $child_is_transparent, $element_state) = @_;
1102     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1103     $self->{onerror}->(node => $child_el,
1104     type => 'element not allowed:minus',
1105     level => $self->{must_level});
1106     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1107     #
1108     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1109     unless ($element_state->{has_title}) {
1110     $element_state->{has_title} = 1;
1111     } else {
1112     $self->{onerror}->(node => $child_el,
1113     type => 'element not allowed:head title',
1114     level => $self->{must_level});
1115     }
1116     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1117     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1118     $self->{onerror}->(node => $child_el,
1119     type => 'element not allowed:head style',
1120     level => $self->{must_level});
1121 wakaba 1.1 }
1122 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1123     #
1124    
1125     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1126     ## a |meta| element with none of |charset|, |name|,
1127     ## or |http-equiv| attribute is not allowed. It is non-conforming
1128     ## anyway.
1129 wakaba 1.56
1130     ## TODO: |form| MUST be empty and in XML [WF2].
1131 wakaba 1.40 } else {
1132     $self->{onerror}->(node => $child_el,
1133     type => 'element not allowed:metadata',
1134     level => $self->{must_level});
1135     }
1136     $element_state->{in_head_original} = $self->{flag}->{in_head};
1137     $self->{flag}->{in_head} = 1;
1138     },
1139     check_child_text => sub {
1140     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1141     if ($has_significant) {
1142     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1143 wakaba 1.1 }
1144 wakaba 1.40 },
1145     check_end => sub {
1146     my ($self, $item, $element_state) = @_;
1147     unless ($element_state->{has_title}) {
1148     $self->{onerror}->(node => $item->{node},
1149     type => 'child element missing:title');
1150 wakaba 1.1 }
1151 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1152 wakaba 1.1
1153 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1154 wakaba 1.1 },
1155     };
1156    
1157 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1158     %HTMLTextChecker,
1159 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1160     check_attrs => $GetHTMLAttrsChecker->({}, {
1161     %HTMLAttrStatus,
1162 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1163 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1164     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1165     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1166 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1167 wakaba 1.49 }),
1168 wakaba 1.40 };
1169 wakaba 1.1
1170 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1171 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1172 wakaba 1.40 %HTMLEmptyChecker,
1173     check_attrs => sub {
1174     my ($self, $item, $element_state) = @_;
1175 wakaba 1.1
1176 wakaba 1.40 if ($self->{has_base}) {
1177     $self->{onerror}->(node => $item->{node},
1178     type => 'element not allowed:base');
1179     } else {
1180     $self->{has_base} = 1;
1181 wakaba 1.29 }
1182    
1183 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1184     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1185 wakaba 1.14
1186     if ($self->{has_uri_attr} and $has_href) {
1187 wakaba 1.4 ## ISSUE: Are these examples conforming?
1188     ## <head profile="a b c"><base href> (except for |profile|'s
1189     ## non-conformance)
1190     ## <title xml:base="relative"/><base href/> (maybe it should be)
1191     ## <unknown xmlns="relative"/><base href/> (assuming that
1192     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1193     ## <style>@import 'relative';</style><base href>
1194     ## <script>location.href = 'relative';</script><base href>
1195 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1196     ## an exception.
1197 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1198 wakaba 1.4 type => 'basehref after URI attribute');
1199     }
1200 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1201 wakaba 1.4 ## ISSUE: Are these examples conforming?
1202     ## <head><title xlink:href=""/><base target="name"/></head>
1203     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1204     ## (assuming that |xbl:xbl| is allowed before |base|)
1205     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1206     ## <link href=""/><base target="name"/>
1207     ## <link rel=unknown href=""><base target=name>
1208 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1209 wakaba 1.4 type => 'basetarget after hyperlink');
1210     }
1211    
1212 wakaba 1.14 if (not $has_href and not $has_target) {
1213 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1214 wakaba 1.14 type => 'attribute missing:href|target');
1215     }
1216    
1217 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1218    
1219 wakaba 1.4 return $GetHTMLAttrsChecker->({
1220     href => $HTMLURIAttrChecker,
1221     target => $HTMLTargetAttrChecker,
1222 wakaba 1.49 }, {
1223     %HTMLAttrStatus,
1224 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1225     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1226     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1227 wakaba 1.40 })->($self, $item, $element_state);
1228 wakaba 1.4 },
1229 wakaba 1.1 };
1230    
1231     $Element->{$HTML_NS}->{link} = {
1232 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1233 wakaba 1.40 %HTMLEmptyChecker,
1234     check_attrs => sub {
1235     my ($self, $item, $element_state) = @_;
1236 wakaba 1.1 $GetHTMLAttrsChecker->({
1237     href => $HTMLURIAttrChecker,
1238 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1239 wakaba 1.1 media => $HTMLMQAttrChecker,
1240     hreflang => $HTMLLanguageTagAttrChecker,
1241     type => $HTMLIMTAttrChecker,
1242     ## NOTE: Though |title| has special semantics,
1243     ## syntactically same as the |title| as global attribute.
1244 wakaba 1.49 }, {
1245     %HTMLAttrStatus,
1246     %HTMLM12NCommonAttrStatus,
1247     charset => FEATURE_M12N10_REC,
1248 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1249     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1250     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1251     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1252 wakaba 1.61 methods => FEATURE_HTML20_RFC,
1253 wakaba 1.50 rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1254 wakaba 1.49 rev => FEATURE_M12N10_REC,
1255 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1256 wakaba 1.49 target => FEATURE_M12N10_REC,
1257 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1258 wakaba 1.61 urn => FEATURE_HTML20_RFC,
1259 wakaba 1.40 })->($self, $item, $element_state);
1260     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1261     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1262 wakaba 1.4 } else {
1263 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1264 wakaba 1.1 type => 'attribute missing:href');
1265     }
1266 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1267     $self->{onerror}->(node => $item->{node},
1268 wakaba 1.1 type => 'attribute missing:rel');
1269     }
1270     },
1271     };
1272    
1273     $Element->{$HTML_NS}->{meta} = {
1274 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1275 wakaba 1.40 %HTMLEmptyChecker,
1276     check_attrs => sub {
1277     my ($self, $item, $element_state) = @_;
1278 wakaba 1.1 my $name_attr;
1279     my $http_equiv_attr;
1280     my $charset_attr;
1281     my $content_attr;
1282 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1283 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1284     $attr_ns = '' unless defined $attr_ns;
1285     my $attr_ln = $attr->manakai_local_name;
1286     my $checker;
1287     if ($attr_ns eq '') {
1288     if ($attr_ln eq 'content') {
1289     $content_attr = $attr;
1290     $checker = 1;
1291     } elsif ($attr_ln eq 'name') {
1292     $name_attr = $attr;
1293     $checker = 1;
1294     } elsif ($attr_ln eq 'http-equiv') {
1295     $http_equiv_attr = $attr;
1296     $checker = 1;
1297     } elsif ($attr_ln eq 'charset') {
1298     $charset_attr = $attr;
1299     $checker = 1;
1300 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
1301     $checker = sub {};
1302     ## NOTE: According to HTML4, values for the |scheme| attribute
1303     ## depend on |name| attribute and |profile| of |head|. Otherwise
1304     ## it is "cdata". The only profile with any scheme value defined
1305     ## is <http://dublincore.org/documents/dcq-html/> (and those
1306     ## references that profile; see
1307     ## <http://suika.fam.cx/gate/2005/sw/scheme#anchor-55> for more
1308     ## information).
1309     ## TODO: Should we implement the checking against the profile above?
1310     ## (But we don't want to implement its namespace bits. It is
1311     ## suck and obsolete in favor of HTML5's new ecosystem.)
1312 wakaba 1.1 } else {
1313     $checker = $HTMLAttrChecker->{$attr_ln}
1314 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
1315 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
1316     }
1317     } else {
1318     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1319     || $AttrChecker->{$attr_ns}->{''};
1320     }
1321 wakaba 1.62
1322     my $status = {
1323     %HTMLAttrStatus,
1324     charset => FEATURE_HTML5_DEFAULT,
1325     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1326     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1327     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1328     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1329     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1330     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1331     scheme => FEATURE_M12N10_REC,
1332     }->{$attr_ln};
1333    
1334 wakaba 1.1 if ($checker) {
1335 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
1336 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
1337 wakaba 1.54 #
1338 wakaba 1.1 } else {
1339     $self->{onerror}->(node => $attr, level => 'unsupported',
1340     type => 'attribute');
1341 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1342     }
1343    
1344     if ($attr_ns eq '') {
1345 wakaba 1.62 $self->_attr_status_info ($attr, $status);
1346 wakaba 1.1 }
1347     }
1348    
1349     if (defined $name_attr) {
1350     if (defined $http_equiv_attr) {
1351     $self->{onerror}->(node => $http_equiv_attr,
1352     type => 'attribute not allowed');
1353     } elsif (defined $charset_attr) {
1354     $self->{onerror}->(node => $charset_attr,
1355     type => 'attribute not allowed');
1356     }
1357     my $metadata_name = $name_attr->value;
1358     my $metadata_value;
1359     if (defined $content_attr) {
1360     $metadata_value = $content_attr->value;
1361     } else {
1362 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1363 wakaba 1.1 type => 'attribute missing:content');
1364     $metadata_value = '';
1365     }
1366     } elsif (defined $http_equiv_attr) {
1367     if (defined $charset_attr) {
1368     $self->{onerror}->(node => $charset_attr,
1369     type => 'attribute not allowed');
1370     }
1371     unless (defined $content_attr) {
1372 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1373 wakaba 1.1 type => 'attribute missing:content');
1374     }
1375     } elsif (defined $charset_attr) {
1376     if (defined $content_attr) {
1377     $self->{onerror}->(node => $content_attr,
1378     type => 'attribute not allowed');
1379     }
1380     } else {
1381     if (defined $content_attr) {
1382     $self->{onerror}->(node => $content_attr,
1383     type => 'attribute not allowed');
1384 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1385 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1386     } else {
1387 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1388 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1389     }
1390     }
1391    
1392 wakaba 1.32 my $check_charset_decl = sub () {
1393 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1394 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1395     for my $el (@{$parent->child_nodes}) {
1396     next unless $el->node_type == 1; # ELEMENT_NODE
1397 wakaba 1.40 unless ($el eq $item->{node}) {
1398 wakaba 1.29 ## NOTE: Not the first child element.
1399 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1400 wakaba 1.32 type => 'element not allowed:meta charset',
1401     level => $self->{must_level});
1402 wakaba 1.29 }
1403     last;
1404     ## NOTE: Entity references are not supported.
1405     }
1406     } else {
1407 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1408 wakaba 1.32 type => 'element not allowed:meta charset',
1409     level => $self->{must_level});
1410 wakaba 1.29 }
1411    
1412 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1413     $self->{onerror}->(node => $item->{node},
1414 wakaba 1.32 type => 'in XML:charset',
1415     level => $self->{must_level});
1416 wakaba 1.1 }
1417 wakaba 1.32 }; # $check_charset_decl
1418 wakaba 1.21
1419 wakaba 1.32 my $check_charset = sub ($$) {
1420     my ($attr, $charset_value) = @_;
1421 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1422     ## is not explicitly spelled in the HTML5 spec, the Character Set
1423     ## registry of IANA, which is referenced from HTML5 spec, says that
1424     ## charset name is case-insensitive.
1425     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1426    
1427     require Message::Charset::Info;
1428     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1429 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1430 wakaba 1.21 if (defined $ic) {
1431     ## TODO: Test for this case
1432     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1433     if ($charset ne $ic_charset) {
1434 wakaba 1.32 $self->{onerror}->(node => $attr,
1435 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1436 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1437     level => $self->{must_level});
1438 wakaba 1.21 }
1439     } else {
1440     ## NOTE: MUST, but not checkable, since the document is not originally
1441     ## in serialized form (or the parser does not preserve the input
1442     ## encoding information).
1443 wakaba 1.32 $self->{onerror}->(node => $attr,
1444     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1445 wakaba 1.21 level => 'unsupported');
1446     }
1447    
1448     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1449     ## Syntactically valid and registered? What about x-charset names?
1450     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1451     ($charset_value)) {
1452 wakaba 1.32 $self->{onerror}->(node => $attr,
1453     type => 'charset:syntax error:'.$charset_value, ## TODO
1454     level => $self->{must_level});
1455 wakaba 1.21 }
1456    
1457     if ($charset) {
1458     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1459     ## with no "preferred MIME name" label)?
1460     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1461     if (($charset_status &
1462     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1463     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1464 wakaba 1.32 $self->{onerror}->(node => $attr,
1465 wakaba 1.21 type => 'charset:not preferred:'.
1466 wakaba 1.32 $charset_value, ## TODO
1467     level => $self->{must_level});
1468 wakaba 1.21 }
1469     if (($charset_status &
1470     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1471     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1472     if ($charset_value =~ /^x-/) {
1473 wakaba 1.32 $self->{onerror}->(node => $attr,
1474     type => 'charset:private:'.$charset_value, ## TODO
1475 wakaba 1.21 level => $self->{good_level});
1476     } else {
1477 wakaba 1.32 $self->{onerror}->(node => $attr,
1478 wakaba 1.21 type => 'charset:not registered:'.
1479 wakaba 1.32 $charset_value, ## TODO
1480 wakaba 1.21 level => $self->{good_level});
1481     }
1482     }
1483     } elsif ($charset_value =~ /^x-/) {
1484 wakaba 1.32 $self->{onerror}->(node => $attr,
1485     type => 'charset:private:'.$charset_value, ## TODO
1486 wakaba 1.21 level => $self->{good_level});
1487     } else {
1488 wakaba 1.32 $self->{onerror}->(node => $attr,
1489     type => 'charset:not registered:'.$charset_value, ## TODO
1490 wakaba 1.21 level => $self->{good_level});
1491     }
1492    
1493 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1494     $self->{onerror}->(node => $attr,
1495 wakaba 1.22 type => 'character reference in charset',
1496     level => $self->{must_level});
1497     }
1498 wakaba 1.32 }; # $check_charset
1499    
1500     ## TODO: metadata conformance
1501    
1502     ## TODO: pragma conformance
1503     if (defined $http_equiv_attr) { ## An enumerated attribute
1504     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1505     if ({
1506     'refresh' => 1,
1507     'default-style' => 1,
1508     }->{$keyword}) {
1509     #
1510 wakaba 1.33
1511     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1512 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1513 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
1514 wakaba 1.33
1515 wakaba 1.32 $check_charset_decl->();
1516     if ($content_attr) {
1517     my $content = $content_attr->value;
1518 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
1519     [\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
1520     =(.+)\z!sx) {
1521 wakaba 1.32 $check_charset->($content_attr, $1);
1522     } else {
1523     $self->{onerror}->(node => $content_attr,
1524     type => 'meta content-type syntax error',
1525     level => $self->{must_level});
1526     }
1527     }
1528     } else {
1529     $self->{onerror}->(node => $http_equiv_attr,
1530     type => 'enumerated:invalid');
1531     }
1532     }
1533    
1534     if (defined $charset_attr) {
1535     $check_charset_decl->();
1536     $check_charset->($charset_attr, $charset_attr->value);
1537 wakaba 1.1 }
1538     },
1539     };
1540    
1541     $Element->{$HTML_NS}->{style} = {
1542 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1543 wakaba 1.40 %HTMLChecker,
1544     check_attrs => $GetHTMLAttrsChecker->({
1545 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1546     media => $HTMLMQAttrChecker,
1547     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1548     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1549     ## not different
1550 wakaba 1.49 }, {
1551     %HTMLAttrStatus,
1552 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1553     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1554     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1555     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1556     scoped => FEATURE_HTML5_DEFAULT,
1557     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1558     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1559 wakaba 1.1 }),
1560 wakaba 1.40 check_start => sub {
1561     my ($self, $item, $element_state) = @_;
1562    
1563 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1564 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1565 wakaba 1.27 if (not defined $type or
1566     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1567 wakaba 1.40 $element_state->{allow_element} = 0;
1568     $element_state->{style_type} = 'text/css';
1569     } else {
1570     $element_state->{allow_element} = 1; # unknown
1571     $element_state->{style_type} = $type; ## TODO: $type normalization
1572     }
1573     },
1574     check_child_element => sub {
1575     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1576     $child_is_transparent, $element_state) = @_;
1577     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1578     $self->{onerror}->(node => $child_el,
1579     type => 'element not allowed:minus',
1580     level => $self->{must_level});
1581     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1582     #
1583     } elsif ($element_state->{allow_element}) {
1584     #
1585     } else {
1586     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1587     }
1588     },
1589     check_child_text => sub {
1590     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1591     $element_state->{text} .= $child_node->text_content;
1592     },
1593     check_end => sub {
1594     my ($self, $item, $element_state) = @_;
1595     if ($element_state->{style_type} eq 'text/css') {
1596     $self->{onsubdoc}->({s => $element_state->{text},
1597     container_node => $item->{node},
1598 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1599 wakaba 1.27 } else {
1600 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1601     type => 'style:'.$element_state->{style_type});
1602 wakaba 1.27 }
1603 wakaba 1.40
1604     $HTMLChecker{check_end}->(@_);
1605 wakaba 1.1 },
1606     };
1607 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1608 wakaba 1.1
1609     $Element->{$HTML_NS}->{body} = {
1610 wakaba 1.40 %HTMLProseContentChecker,
1611 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1612 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1613     alink => $HTMLColorAttrChecker,
1614     background => $HTMLURIAttrChecker,
1615     bgcolor => $HTMLColorAttrChecker,
1616     link => $HTMLColorAttrChecker,
1617     text => $HTMLColorAttrChecker,
1618     vlink => $HTMLColorAttrChecker,
1619     }, {
1620 wakaba 1.49 %HTMLAttrStatus,
1621     %HTMLM12NCommonAttrStatus,
1622     alink => FEATURE_M12N10_REC_DEPRECATED,
1623     background => FEATURE_M12N10_REC_DEPRECATED,
1624     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1625 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1626 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1627 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1628     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1629 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1630     vlink => FEATURE_M12N10_REC_DEPRECATED,
1631     }),
1632 wakaba 1.68 check_start => sub {
1633     my ($self, $item, $element_state) = @_;
1634    
1635     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
1636     },
1637 wakaba 1.1 };
1638    
1639     $Element->{$HTML_NS}->{section} = {
1640 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1641 wakaba 1.40 %HTMLProseContentChecker,
1642 wakaba 1.1 };
1643    
1644     $Element->{$HTML_NS}->{nav} = {
1645 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1646 wakaba 1.40 %HTMLProseContentChecker,
1647 wakaba 1.1 };
1648    
1649     $Element->{$HTML_NS}->{article} = {
1650 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1651 wakaba 1.40 %HTMLProseContentChecker,
1652 wakaba 1.1 };
1653    
1654     $Element->{$HTML_NS}->{blockquote} = {
1655 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1656 wakaba 1.40 %HTMLProseContentChecker,
1657     check_attrs => $GetHTMLAttrsChecker->({
1658 wakaba 1.1 cite => $HTMLURIAttrChecker,
1659 wakaba 1.49 }, {
1660     %HTMLAttrStatus,
1661     %HTMLM12NCommonAttrStatus,
1662 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1663 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1664     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1665 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1666 wakaba 1.1 }),
1667 wakaba 1.66 check_start => sub {
1668     my ($self, $item, $element_state) = @_;
1669    
1670     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
1671     },
1672 wakaba 1.1 };
1673    
1674     $Element->{$HTML_NS}->{aside} = {
1675 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1676 wakaba 1.40 %HTMLProseContentChecker,
1677 wakaba 1.1 };
1678    
1679     $Element->{$HTML_NS}->{h1} = {
1680 wakaba 1.40 %HTMLPhrasingContentChecker,
1681 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1682 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1683     align => $GetHTMLEnumeratedAttrChecker->({
1684     left => 1, center => 1, right => 1, justify => 1,
1685     }),
1686     }, {
1687 wakaba 1.49 %HTMLAttrStatus,
1688     %HTMLM12NCommonAttrStatus,
1689     align => FEATURE_M12N10_REC_DEPRECATED,
1690 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1691 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1692 wakaba 1.49 }),
1693 wakaba 1.40 check_start => sub {
1694     my ($self, $item, $element_state) = @_;
1695     $self->{flag}->{has_hn} = 1;
1696 wakaba 1.1 },
1697     };
1698    
1699 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1700 wakaba 1.1
1701 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1702 wakaba 1.1
1703 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1704 wakaba 1.1
1705 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1706 wakaba 1.1
1707 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1708 wakaba 1.1
1709 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1710    
1711 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1712 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1713 wakaba 1.40 %HTMLProseContentChecker,
1714     check_start => sub {
1715     my ($self, $item, $element_state) = @_;
1716     $self->_add_minus_elements ($element_state,
1717     {$HTML_NS => {qw/header 1 footer 1/}},
1718 wakaba 1.58 $HTMLSectioningContent);
1719 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1720     $self->{flag}->{has_hn} = 0;
1721     },
1722     check_end => sub {
1723     my ($self, $item, $element_state) = @_;
1724     $self->_remove_minus_elements ($element_state);
1725     unless ($self->{flag}->{has_hn}) {
1726     $self->{onerror}->(node => $item->{node},
1727     type => 'element missing:hn');
1728     }
1729     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1730 wakaba 1.1
1731 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1732 wakaba 1.1 },
1733 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1734 wakaba 1.1 };
1735    
1736     $Element->{$HTML_NS}->{footer} = {
1737 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1738 wakaba 1.40 %HTMLProseContentChecker,
1739     check_start => sub {
1740     my ($self, $item, $element_state) = @_;
1741     $self->_add_minus_elements ($element_state,
1742     {$HTML_NS => {footer => 1}},
1743 wakaba 1.58 $HTMLSectioningContent,
1744 wakaba 1.57 $HTMLHeadingContent);
1745 wakaba 1.40 },
1746     check_end => sub {
1747     my ($self, $item, $element_state) = @_;
1748     $self->_remove_minus_elements ($element_state);
1749 wakaba 1.1
1750 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1751 wakaba 1.1 },
1752     };
1753    
1754     $Element->{$HTML_NS}->{address} = {
1755 wakaba 1.40 %HTMLProseContentChecker,
1756 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1757     check_attrs => $GetHTMLAttrsChecker->({}, {
1758     %HTMLAttrStatus,
1759     %HTMLM12NCommonAttrStatus,
1760 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1761 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1762 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1763     sdapref => FEATURE_HTML20_RFC,
1764 wakaba 1.49 }),
1765 wakaba 1.40 check_start => sub {
1766     my ($self, $item, $element_state) = @_;
1767     $self->_add_minus_elements ($element_state,
1768     {$HTML_NS => {footer => 1, address => 1}},
1769     $HTMLSectioningContent, $HTMLHeadingContent);
1770     },
1771     check_end => sub {
1772     my ($self, $item, $element_state) = @_;
1773     $self->_remove_minus_elements ($element_state);
1774 wakaba 1.29
1775 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1776 wakaba 1.29 },
1777 wakaba 1.1 };
1778    
1779     $Element->{$HTML_NS}->{p} = {
1780 wakaba 1.40 %HTMLPhrasingContentChecker,
1781 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1782 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1783     align => $GetHTMLEnumeratedAttrChecker->({
1784     left => 1, center => 1, right => 1, justify => 1,
1785     }),
1786     }, {
1787 wakaba 1.49 %HTMLAttrStatus,
1788     %HTMLM12NCommonAttrStatus,
1789     align => FEATURE_M12N10_REC_DEPRECATED,
1790 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1791 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1792 wakaba 1.49 }),
1793 wakaba 1.1 };
1794    
1795     $Element->{$HTML_NS}->{hr} = {
1796 wakaba 1.40 %HTMLEmptyChecker,
1797 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1798     check_attrs => $GetHTMLAttrsChecker->({}, {
1799     %HTMLAttrStatus,
1800     %HTMLM12NCommonAttrStatus,
1801     align => FEATURE_M12N10_REC_DEPRECATED,
1802 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1803 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1804 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1805 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
1806     width => FEATURE_M12N10_REC_DEPRECATED,
1807     }),
1808 wakaba 1.1 };
1809    
1810     $Element->{$HTML_NS}->{br} = {
1811 wakaba 1.40 %HTMLEmptyChecker,
1812 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1813 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1814     clear => $GetHTMLEnumeratedAttrChecker->({
1815     left => 1, all => 1, right => 1, none => 1,
1816     }),
1817     }, {
1818 wakaba 1.49 %HTMLAttrStatus,
1819 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1820 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1821 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1822 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1823 wakaba 1.49 style => FEATURE_XHTML10_REC,
1824 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1825 wakaba 1.49 }),
1826 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1827     ## (This requirement is semantic so that we cannot check.)
1828 wakaba 1.1 };
1829    
1830     $Element->{$HTML_NS}->{dialog} = {
1831 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1832 wakaba 1.40 %HTMLChecker,
1833     check_start => sub {
1834     my ($self, $item, $element_state) = @_;
1835     $element_state->{phase} = 'before dt';
1836     },
1837     check_child_element => sub {
1838     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1839     $child_is_transparent, $element_state) = @_;
1840     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1841     $self->{onerror}->(node => $child_el,
1842     type => 'element not allowed:minus',
1843     level => $self->{must_level});
1844     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1845     #
1846     } elsif ($element_state->{phase} eq 'before dt') {
1847     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1848     $element_state->{phase} = 'before dd';
1849     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1850     $self->{onerror}
1851     ->(node => $child_el, type => 'ps element missing:dt');
1852     $element_state->{phase} = 'before dt';
1853     } else {
1854     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1855     }
1856     } elsif ($element_state->{phase} eq 'before dd') {
1857     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1858     $element_state->{phase} = 'before dt';
1859     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1860     $self->{onerror}
1861     ->(node => $child_el, type => 'ps element missing:dd');
1862     $element_state->{phase} = 'before dd';
1863     } else {
1864     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1865 wakaba 1.1 }
1866 wakaba 1.40 } else {
1867     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1868     }
1869     },
1870     check_child_text => sub {
1871     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1872     if ($has_significant) {
1873     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1874 wakaba 1.1 }
1875 wakaba 1.40 },
1876     check_end => sub {
1877     my ($self, $item, $element_state) = @_;
1878     if ($element_state->{phase} eq 'before dd') {
1879     $self->{onerror}->(node => $item->{node},
1880     type => 'child element missing:dd');
1881 wakaba 1.1 }
1882 wakaba 1.40
1883     $HTMLChecker{check_end}->(@_);
1884 wakaba 1.1 },
1885     };
1886    
1887     $Element->{$HTML_NS}->{pre} = {
1888 wakaba 1.40 %HTMLPhrasingContentChecker,
1889 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1890 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1891     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1892     }, {
1893 wakaba 1.49 %HTMLAttrStatus,
1894     %HTMLM12NCommonAttrStatus,
1895 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1896 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1897 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1898     }),
1899 wakaba 1.1 };
1900    
1901     $Element->{$HTML_NS}->{ol} = {
1902 wakaba 1.40 %HTMLChecker,
1903 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1904 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1905 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
1906 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1907 wakaba 1.53 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
1908 wakaba 1.49 }, {
1909     %HTMLAttrStatus,
1910     %HTMLM12NCommonAttrStatus,
1911 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1912 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
1913 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1914 wakaba 1.53 reversed => FEATURE_HTML5_DEFAULT,
1915 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1916 wakaba 1.54 #start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1917     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1918 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1919 wakaba 1.1 }),
1920 wakaba 1.40 check_child_element => sub {
1921     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1922     $child_is_transparent, $element_state) = @_;
1923     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1924     $self->{onerror}->(node => $child_el,
1925     type => 'element not allowed:minus',
1926     level => $self->{must_level});
1927     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1928     #
1929     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1930     #
1931     } else {
1932     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1933 wakaba 1.1 }
1934 wakaba 1.40 },
1935     check_child_text => sub {
1936     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1937     if ($has_significant) {
1938     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1939 wakaba 1.1 }
1940     },
1941     };
1942    
1943     $Element->{$HTML_NS}->{ul} = {
1944 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1945 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1946 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1947     compact => $GetHTMLBooleanAttrChecker->('compact'),
1948     }, {
1949 wakaba 1.49 %HTMLAttrStatus,
1950     %HTMLM12NCommonAttrStatus,
1951 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1952 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
1953 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1954 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1955 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1956     }),
1957 wakaba 1.1 };
1958    
1959 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
1960     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
1961     %{$Element->{$HTML_NS}->{ul}},
1962     status => FEATURE_M12N10_REC_DEPRECATED,
1963 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1964     compact => $GetHTMLBooleanAttrChecker->('compact'),
1965     }, {
1966 wakaba 1.64 %HTMLAttrStatus,
1967     %HTMLM12NCommonAttrStatus,
1968     align => FEATURE_HTML2X_RFC,
1969     compact => FEATURE_M12N10_REC_DEPRECATED,
1970     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1971     sdaform => FEATURE_HTML20_RFC,
1972     sdapref => FEATURE_HTML20_RFC,
1973     }),
1974     };
1975    
1976 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
1977 wakaba 1.40 %HTMLProseContentChecker,
1978 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1979 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1980 wakaba 1.49 value => sub {
1981 wakaba 1.1 my ($self, $attr) = @_;
1982     my $parent = $attr->owner_element->manakai_parent_element;
1983     if (defined $parent) {
1984     my $parent_ns = $parent->namespace_uri;
1985     $parent_ns = '' unless defined $parent_ns;
1986     my $parent_ln = $parent->manakai_local_name;
1987     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1988     $self->{onerror}->(node => $attr, level => 'unsupported',
1989     type => 'attribute');
1990     }
1991     }
1992     $HTMLIntegerAttrChecker->($self, $attr);
1993 wakaba 1.49 }, ## TODO: test
1994     }, {
1995     %HTMLAttrStatus,
1996     %HTMLM12NCommonAttrStatus,
1997 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1998 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1999 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2000 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2001 wakaba 1.55 #value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
2002     # FEATURE_M12N10_REC_DEPRECATED,
2003     value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
2004     FEATURE_M12N10_REC,
2005 wakaba 1.1 }),
2006 wakaba 1.40 check_child_element => sub {
2007     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2008     $child_is_transparent, $element_state) = @_;
2009     if ($self->{flag}->{in_menu}) {
2010     $HTMLPhrasingContentChecker{check_child_element}->(@_);
2011     } else {
2012     $HTMLProseContentChecker{check_child_element}->(@_);
2013     }
2014     },
2015     check_child_text => sub {
2016     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2017     if ($self->{flag}->{in_menu}) {
2018     $HTMLPhrasingContentChecker{check_child_text}->(@_);
2019 wakaba 1.1 } else {
2020 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
2021 wakaba 1.1 }
2022     },
2023     };
2024    
2025     $Element->{$HTML_NS}->{dl} = {
2026 wakaba 1.40 %HTMLChecker,
2027 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2028 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2029     compact => $GetHTMLBooleanAttrChecker->('compact'),
2030     }, {
2031 wakaba 1.49 %HTMLAttrStatus,
2032     %HTMLM12NCommonAttrStatus,
2033     compact => FEATURE_M12N10_REC_DEPRECATED,
2034 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2035 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2036     sdapref => FEATURE_HTML20_RFC,
2037 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2038     }),
2039 wakaba 1.40 check_start => sub {
2040     my ($self, $item, $element_state) = @_;
2041     $element_state->{phase} = 'before dt';
2042     },
2043     check_child_element => sub {
2044     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2045     $child_is_transparent, $element_state) = @_;
2046     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2047     $self->{onerror}->(node => $child_el,
2048     type => 'element not allowed:minus',
2049     level => $self->{must_level});
2050     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2051     #
2052     } elsif ($element_state->{phase} eq 'in dds') {
2053     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2054     #$element_state->{phase} = 'in dds';
2055     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2056     $element_state->{phase} = 'in dts';
2057     } else {
2058     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2059     }
2060     } elsif ($element_state->{phase} eq 'in dts') {
2061     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2062     #$element_state->{phase} = 'in dts';
2063     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2064     $element_state->{phase} = 'in dds';
2065     } else {
2066     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2067     }
2068     } elsif ($element_state->{phase} eq 'before dt') {
2069     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2070     $element_state->{phase} = 'in dts';
2071     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2072     $self->{onerror}
2073     ->(node => $child_el, type => 'ps element missing:dt');
2074     $element_state->{phase} = 'in dds';
2075     } else {
2076     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2077 wakaba 1.1 }
2078 wakaba 1.40 } else {
2079     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
2080 wakaba 1.1 }
2081 wakaba 1.40 },
2082     check_child_text => sub {
2083     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2084     if ($has_significant) {
2085     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2086     }
2087     },
2088     check_end => sub {
2089     my ($self, $item, $element_state) = @_;
2090     if ($element_state->{phase} eq 'in dts') {
2091     $self->{onerror}->(node => $item->{node},
2092     type => 'child element missing:dd');
2093 wakaba 1.1 }
2094    
2095 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2096 wakaba 1.1 },
2097     };
2098    
2099     $Element->{$HTML_NS}->{dt} = {
2100 wakaba 1.40 %HTMLPhrasingContentChecker,
2101 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2102     check_attrs => $GetHTMLAttrsChecker->({}, {
2103     %HTMLAttrStatus,
2104     %HTMLM12NCommonAttrStatus,
2105 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2106 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2107 wakaba 1.49 }),
2108 wakaba 1.1 };
2109    
2110     $Element->{$HTML_NS}->{dd} = {
2111 wakaba 1.40 %HTMLProseContentChecker,
2112 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2113     check_attrs => $GetHTMLAttrsChecker->({}, {
2114     %HTMLAttrStatus,
2115     %HTMLM12NCommonAttrStatus,
2116 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2117 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2118 wakaba 1.49 }),
2119 wakaba 1.1 };
2120    
2121     $Element->{$HTML_NS}->{a} = {
2122 wakaba 1.40 %HTMLPhrasingContentChecker,
2123 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2124 wakaba 1.40 check_attrs => sub {
2125     my ($self, $item, $element_state) = @_;
2126 wakaba 1.1 my %attr;
2127 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2128 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2129     $attr_ns = '' unless defined $attr_ns;
2130     my $attr_ln = $attr->manakai_local_name;
2131     my $checker;
2132     if ($attr_ns eq '') {
2133     $checker = {
2134 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
2135 wakaba 1.1 target => $HTMLTargetAttrChecker,
2136     href => $HTMLURIAttrChecker,
2137     ping => $HTMLSpaceURIsAttrChecker,
2138 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2139 wakaba 1.1 media => $HTMLMQAttrChecker,
2140     hreflang => $HTMLLanguageTagAttrChecker,
2141     type => $HTMLIMTAttrChecker,
2142     }->{$attr_ln};
2143     if ($checker) {
2144     $attr{$attr_ln} = $attr;
2145     } else {
2146     $checker = $HTMLAttrChecker->{$attr_ln};
2147     }
2148     }
2149     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2150     || $AttrChecker->{$attr_ns}->{''};
2151 wakaba 1.62
2152     my $status = {
2153     %HTMLAttrStatus,
2154     %HTMLM12NCommonAttrStatus,
2155     accesskey => FEATURE_M12N10_REC,
2156     charset => FEATURE_M12N10_REC,
2157     coords => FEATURE_M12N10_REC,
2158     cryptopts => FEATURE_RFC2659,
2159     dn => FEATURE_RFC2659,
2160     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2161     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2162     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2163     media => FEATURE_HTML5_DEFAULT,
2164     methods => FEATURE_HTML20_RFC,
2165     name => FEATURE_M12N10_REC_DEPRECATED,
2166     nonce => FEATURE_RFC2659,
2167     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2168     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2169     ping => FEATURE_HTML5_DEFAULT,
2170     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2171     rev => FEATURE_M12N10_REC,
2172     sdapref => FEATURE_HTML20_RFC,
2173     shape => FEATURE_M12N10_REC,
2174     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2175     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2176     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2177     urn => FEATURE_HTML20_RFC,
2178     }->{$attr_ln};
2179    
2180 wakaba 1.1 if ($checker) {
2181 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2182 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2183 wakaba 1.54 #
2184 wakaba 1.1 } else {
2185     $self->{onerror}->(node => $attr, level => 'unsupported',
2186     type => 'attribute');
2187 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
2188 wakaba 1.1 }
2189 wakaba 1.49
2190     if ($attr_ns eq '') {
2191 wakaba 1.62 $self->_attr_status_info ($attr, $status);
2192 wakaba 1.49 }
2193 wakaba 1.1 }
2194    
2195 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
2196 wakaba 1.4 if (defined $attr{href}) {
2197     $self->{has_hyperlink_element} = 1;
2198 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
2199 wakaba 1.4 } else {
2200 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2201     if (defined $attr{$_}) {
2202     $self->{onerror}->(node => $attr{$_},
2203     type => 'attribute not allowed');
2204     }
2205     }
2206     }
2207 wakaba 1.66
2208     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
2209 wakaba 1.1 },
2210 wakaba 1.40 check_start => sub {
2211     my ($self, $item, $element_state) = @_;
2212     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
2213     },
2214     check_end => sub {
2215     my ($self, $item, $element_state) = @_;
2216     $self->_remove_minus_elements ($element_state);
2217 wakaba 1.59 delete $self->{flag}->{in_a_href}
2218     unless $element_state->{in_a_href_original};
2219 wakaba 1.1
2220 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2221 wakaba 1.1 },
2222     };
2223    
2224     $Element->{$HTML_NS}->{q} = {
2225 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2226 wakaba 1.40 %HTMLPhrasingContentChecker,
2227     check_attrs => $GetHTMLAttrsChecker->({
2228 wakaba 1.50 cite => $HTMLURIAttrChecker,
2229     }, {
2230 wakaba 1.49 %HTMLAttrStatus,
2231     %HTMLM12NCommonAttrStatus,
2232 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2233     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2234 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2235     sdasuff => FEATURE_HTML2X_RFC,
2236 wakaba 1.1 }),
2237 wakaba 1.66 check_start => sub {
2238     my ($self, $item, $element_state) = @_;
2239    
2240     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2241     },
2242 wakaba 1.1 };
2243    
2244     $Element->{$HTML_NS}->{cite} = {
2245 wakaba 1.40 %HTMLPhrasingContentChecker,
2246 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2247     check_attrs => $GetHTMLAttrsChecker->({}, {
2248     %HTMLAttrStatus,
2249     %HTMLM12NCommonAttrStatus,
2250 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2251 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2252 wakaba 1.49 }),
2253 wakaba 1.1 };
2254    
2255     $Element->{$HTML_NS}->{em} = {
2256 wakaba 1.40 %HTMLPhrasingContentChecker,
2257 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2258     check_attrs => $GetHTMLAttrsChecker->({}, {
2259     %HTMLAttrStatus,
2260     %HTMLM12NCommonAttrStatus,
2261 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2262 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2263 wakaba 1.49 }),
2264 wakaba 1.1 };
2265    
2266     $Element->{$HTML_NS}->{strong} = {
2267 wakaba 1.40 %HTMLPhrasingContentChecker,
2268 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2269     check_attrs => $GetHTMLAttrsChecker->({}, {
2270     %HTMLAttrStatus,
2271     %HTMLM12NCommonAttrStatus,
2272 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2273 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2274 wakaba 1.49 }),
2275 wakaba 1.1 };
2276    
2277     $Element->{$HTML_NS}->{small} = {
2278 wakaba 1.40 %HTMLPhrasingContentChecker,
2279 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2280     check_attrs => $GetHTMLAttrsChecker->({}, {
2281     %HTMLAttrStatus,
2282     %HTMLM12NCommonAttrStatus,
2283 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2284 wakaba 1.49 }),
2285 wakaba 1.1 };
2286    
2287 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
2288     %HTMLPhrasingContentChecker,
2289     status => FEATURE_M12N10_REC,
2290     check_attrs => $GetHTMLAttrsChecker->({}, {
2291     %HTMLAttrStatus,
2292     %HTMLM12NCommonAttrStatus,
2293     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2294     }),
2295     };
2296    
2297 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
2298 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2299 wakaba 1.40 %HTMLPhrasingContentChecker,
2300 wakaba 1.1 };
2301    
2302     $Element->{$HTML_NS}->{dfn} = {
2303 wakaba 1.40 %HTMLPhrasingContentChecker,
2304 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2305     check_attrs => $GetHTMLAttrsChecker->({}, {
2306     %HTMLAttrStatus,
2307     %HTMLM12NCommonAttrStatus,
2308 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2309 wakaba 1.49 }),
2310 wakaba 1.40 check_start => sub {
2311     my ($self, $item, $element_state) = @_;
2312     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
2313 wakaba 1.1
2314 wakaba 1.40 my $node = $item->{node};
2315 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
2316     unless (defined $term) {
2317     for my $child (@{$node->child_nodes}) {
2318     if ($child->node_type == 1) { # ELEMENT_NODE
2319     if (defined $term) {
2320     undef $term;
2321     last;
2322     } elsif ($child->manakai_local_name eq 'abbr') {
2323     my $nsuri = $child->namespace_uri;
2324     if (defined $nsuri and $nsuri eq $HTML_NS) {
2325     my $attr = $child->get_attribute_node_ns (undef, 'title');
2326     if ($attr) {
2327     $term = $attr->value;
2328     }
2329     }
2330     }
2331     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2332     ## TEXT_NODE or CDATA_SECTION_NODE
2333     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2334     next;
2335     }
2336     undef $term;
2337     last;
2338     }
2339     }
2340     unless (defined $term) {
2341     $term = $node->text_content;
2342     }
2343     }
2344     if ($self->{term}->{$term}) {
2345     $self->{onerror}->(node => $node, type => 'duplicate term');
2346     push @{$self->{term}->{$term}}, $node;
2347     } else {
2348     $self->{term}->{$term} = [$node];
2349     }
2350     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2351     ## has |title|.
2352 wakaba 1.40 },
2353     check_end => sub {
2354     my ($self, $item, $element_state) = @_;
2355     $self->_remove_minus_elements ($element_state);
2356 wakaba 1.1
2357 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2358 wakaba 1.1 },
2359     };
2360    
2361     $Element->{$HTML_NS}->{abbr} = {
2362 wakaba 1.40 %HTMLPhrasingContentChecker,
2363 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2364     check_attrs => $GetHTMLAttrsChecker->({}, {
2365     %HTMLAttrStatus,
2366     %HTMLM12NCommonAttrStatus,
2367 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2368 wakaba 1.49 }),
2369     };
2370    
2371     $Element->{$HTML_NS}->{acronym} = {
2372     %HTMLPhrasingContentChecker,
2373     status => FEATURE_M12N10_REC,
2374     check_attrs => $GetHTMLAttrsChecker->({}, {
2375     %HTMLAttrStatus,
2376     %HTMLM12NCommonAttrStatus,
2377 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2378 wakaba 1.49 }),
2379 wakaba 1.1 };
2380    
2381     $Element->{$HTML_NS}->{time} = {
2382 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2383 wakaba 1.40 %HTMLPhrasingContentChecker,
2384     check_attrs => $GetHTMLAttrsChecker->({
2385 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2386 wakaba 1.49 }, {
2387     %HTMLAttrStatus,
2388     %HTMLM12NCommonAttrStatus,
2389 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2390 wakaba 1.1 }),
2391     ## TODO: Write tests
2392 wakaba 1.40 check_end => sub {
2393     my ($self, $item, $element_state) = @_;
2394 wakaba 1.1
2395 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2396 wakaba 1.1 my $input;
2397     my $reg_sp;
2398     my $input_node;
2399     if ($attr) {
2400     $input = $attr->value;
2401     $reg_sp = qr/[\x09-\x0D\x20]*/;
2402     $input_node = $attr;
2403     } else {
2404 wakaba 1.40 $input = $item->{node}->text_content;
2405 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2406 wakaba 1.40 $input_node = $item->{node};
2407 wakaba 1.1
2408     ## ISSUE: What is the definition for "successfully extracts a date
2409     ## or time"? If the algorithm says the string is invalid but
2410     ## return some date or time, is it "successfully"?
2411     }
2412    
2413     my $hour;
2414     my $minute;
2415     my $second;
2416     if ($input =~ /
2417     \A
2418     [\x09-\x0D\x20]*
2419     ([0-9]+) # 1
2420     (?>
2421     -([0-9]+) # 2
2422     -([0-9]+) # 3
2423     [\x09-\x0D\x20]*
2424     (?>
2425     T
2426     [\x09-\x0D\x20]*
2427     )?
2428     ([0-9]+) # 4
2429     :([0-9]+) # 5
2430     (?>
2431     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2432     )?
2433     [\x09-\x0D\x20]*
2434     (?>
2435     Z
2436     [\x09-\x0D\x20]*
2437     |
2438     [+-]([0-9]+):([0-9]+) # 7, 8
2439     [\x09-\x0D\x20]*
2440     )?
2441     \z
2442     |
2443     :([0-9]+) # 9
2444     (?>
2445     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2446     )?
2447     [\x09-\x0D\x20]*\z
2448     )
2449     /x) {
2450     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2451     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2452     length $4 != 2 or length $5 != 2) {
2453     $self->{onerror}->(node => $input_node,
2454     type => 'dateortime:syntax error');
2455     }
2456    
2457     if (1 <= $2 and $2 <= 12) {
2458     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2459     if $3 < 1 or
2460     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2461     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2462     if $2 == 2 and $3 == 29 and
2463     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2464     } else {
2465     $self->{onerror}->(node => $input_node,
2466     type => 'datetime:bad month');
2467     }
2468    
2469     ($hour, $minute, $second) = ($4, $5, $6);
2470    
2471     if (defined $7) { ## [+-]hh:mm
2472     if (length $7 != 2 or length $8 != 2) {
2473     $self->{onerror}->(node => $input_node,
2474     type => 'dateortime:syntax error');
2475     }
2476    
2477     $self->{onerror}->(node => $input_node,
2478     type => 'datetime:bad timezone hour')
2479     if $7 > 23;
2480     $self->{onerror}->(node => $input_node,
2481     type => 'datetime:bad timezone minute')
2482     if $8 > 59;
2483     }
2484     } else { ## hh:mm
2485     if (length $1 != 2 or length $9 != 2) {
2486     $self->{onerror}->(node => $input_node,
2487     type => qq'dateortime:syntax error');
2488     }
2489    
2490     ($hour, $minute, $second) = ($1, $9, $10);
2491     }
2492    
2493     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2494     if $hour > 23;
2495     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2496     if $minute > 59;
2497    
2498     if (defined $second) { ## s
2499     ## NOTE: Integer part of second don't have to have length of two.
2500    
2501     if (substr ($second, 0, 1) eq '.') {
2502     $self->{onerror}->(node => $input_node,
2503     type => 'dateortime:syntax error');
2504     }
2505    
2506     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2507     if $second >= 60;
2508     }
2509     } else {
2510     $self->{onerror}->(node => $input_node,
2511     type => 'dateortime:syntax error');
2512     }
2513    
2514 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2515 wakaba 1.1 },
2516     };
2517    
2518     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2519 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2520 wakaba 1.40 %HTMLPhrasingContentChecker,
2521     check_attrs => $GetHTMLAttrsChecker->({
2522 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2523     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2524     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2525     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2526     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2527     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2528 wakaba 1.50 }, {
2529     %HTMLAttrStatus,
2530     high => FEATURE_HTML5_DEFAULT,
2531     low => FEATURE_HTML5_DEFAULT,
2532     max => FEATURE_HTML5_DEFAULT,
2533     min => FEATURE_HTML5_DEFAULT,
2534     optimum => FEATURE_HTML5_DEFAULT,
2535     value => FEATURE_HTML5_DEFAULT,
2536 wakaba 1.1 }),
2537     };
2538    
2539     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2540 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2541 wakaba 1.40 %HTMLPhrasingContentChecker,
2542     check_attrs => $GetHTMLAttrsChecker->({
2543 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2544     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2545 wakaba 1.50 }, {
2546     %HTMLAttrStatus,
2547     max => FEATURE_HTML5_DEFAULT,
2548     value => FEATURE_HTML5_DEFAULT,
2549 wakaba 1.1 }),
2550     };
2551    
2552     $Element->{$HTML_NS}->{code} = {
2553 wakaba 1.40 %HTMLPhrasingContentChecker,
2554 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2555     check_attrs => $GetHTMLAttrsChecker->({}, {
2556     %HTMLAttrStatus,
2557     %HTMLM12NCommonAttrStatus,
2558 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2559 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2560 wakaba 1.49 }),
2561 wakaba 1.1 };
2562    
2563     $Element->{$HTML_NS}->{var} = {
2564 wakaba 1.40 %HTMLPhrasingContentChecker,
2565 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2566     check_attrs => $GetHTMLAttrsChecker->({}, {
2567     %HTMLAttrStatus,
2568     %HTMLM12NCommonAttrStatus,
2569 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2570 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2571 wakaba 1.49 }),
2572 wakaba 1.1 };
2573    
2574     $Element->{$HTML_NS}->{samp} = {
2575 wakaba 1.40 %HTMLPhrasingContentChecker,
2576 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2577     check_attrs => $GetHTMLAttrsChecker->({}, {
2578     %HTMLAttrStatus,
2579     %HTMLM12NCommonAttrStatus,
2580 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2581 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2582 wakaba 1.49 }),
2583 wakaba 1.1 };
2584    
2585     $Element->{$HTML_NS}->{kbd} = {
2586 wakaba 1.40 %HTMLPhrasingContentChecker,
2587 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2588     check_attrs => $GetHTMLAttrsChecker->({}, {
2589     %HTMLAttrStatus,
2590     %HTMLM12NCommonAttrStatus,
2591 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2592 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2593 wakaba 1.49 }),
2594 wakaba 1.1 };
2595    
2596     $Element->{$HTML_NS}->{sub} = {
2597 wakaba 1.40 %HTMLPhrasingContentChecker,
2598 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2599     check_attrs => $GetHTMLAttrsChecker->({}, {
2600     %HTMLAttrStatus,
2601     %HTMLM12NCommonAttrStatus,
2602 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2603 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2604 wakaba 1.49 }),
2605 wakaba 1.1 };
2606    
2607 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
2608 wakaba 1.1
2609     $Element->{$HTML_NS}->{span} = {
2610 wakaba 1.40 %HTMLPhrasingContentChecker,
2611 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2612     check_attrs => $GetHTMLAttrsChecker->({}, {
2613     %HTMLAttrStatus,
2614     %HTMLM12NCommonAttrStatus,
2615     datafld => FEATURE_HTML4_REC_RESERVED,
2616     dataformatas => FEATURE_HTML4_REC_RESERVED,
2617     datasrc => FEATURE_HTML4_REC_RESERVED,
2618 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2619 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
2620 wakaba 1.49 }),
2621 wakaba 1.1 };
2622    
2623     $Element->{$HTML_NS}->{i} = {
2624 wakaba 1.40 %HTMLPhrasingContentChecker,
2625 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2626     check_attrs => $GetHTMLAttrsChecker->({}, {
2627     %HTMLAttrStatus,
2628     %HTMLM12NCommonAttrStatus,
2629 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2630 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2631 wakaba 1.49 }),
2632 wakaba 1.1 };
2633    
2634 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
2635    
2636 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
2637     %HTMLPhrasingContentChecker,
2638     status => FEATURE_M12N10_REC,
2639     check_attrs => $GetHTMLAttrsChecker->({}, {
2640     %HTMLAttrStatus,
2641     %HTMLM12NCommonAttrStatus,
2642     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2643     sdaform => FEATURE_HTML20_RFC,
2644     }),
2645     };
2646 wakaba 1.51
2647     $Element->{$HTML_NS}->{s} = {
2648 wakaba 1.40 %HTMLPhrasingContentChecker,
2649 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
2650 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
2651     %HTMLAttrStatus,
2652     %HTMLM12NCommonAttrStatus,
2653 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2654 wakaba 1.49 }),
2655 wakaba 1.1 };
2656    
2657 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
2658    
2659     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
2660    
2661 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
2662 wakaba 1.40 %HTMLPhrasingContentChecker,
2663 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2664 wakaba 1.40 check_attrs => sub {
2665     my ($self, $item, $element_state) = @_;
2666 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2667     %HTMLAttrStatus,
2668 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2669     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2670     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2671 wakaba 1.49 style => FEATURE_XHTML10_REC,
2672 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2673     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2674 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2675     sdasuff => FEATURE_HTML2X_RFC,
2676 wakaba 1.49 })->($self, $item, $element_state);
2677 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2678     $self->{onerror}->(node => $item->{node},
2679     type => 'attribute missing:dir');
2680 wakaba 1.1 }
2681     },
2682     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2683     };
2684    
2685 wakaba 1.29 =pod
2686    
2687     ## TODO:
2688    
2689     +
2690     + <p>Partly because of the confusion described above, authors are
2691     + strongly recommended to always mark up all paragraphs with the
2692     + <code>p</code> element, and to not have any <code>ins</code> or
2693     + <code>del</code> elements that cross across any <span
2694     + title="paragraph">implied paragraphs</span>.</p>
2695     +
2696     (An informative note)
2697    
2698     <p><code>ins</code> elements should not cross <span
2699     + title="paragraph">implied paragraph</span> boundaries.</p>
2700     (normative)
2701    
2702     + <p><code>del</code> elements should not cross <span
2703     + title="paragraph">implied paragraph</span> boundaries.</p>
2704     (normative)
2705    
2706     =cut
2707    
2708 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2709 wakaba 1.40 %HTMLTransparentChecker,
2710 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2711 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2712 wakaba 1.1 cite => $HTMLURIAttrChecker,
2713     datetime => $HTMLDatetimeAttrChecker,
2714 wakaba 1.49 }, {
2715     %HTMLAttrStatus,
2716     %HTMLM12NCommonAttrStatus,
2717 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2718     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2719     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2720 wakaba 1.1 }),
2721 wakaba 1.66 check_start => sub {
2722     my ($self, $item, $element_state) = @_;
2723    
2724     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2725     },
2726 wakaba 1.1 };
2727    
2728     $Element->{$HTML_NS}->{del} = {
2729 wakaba 1.40 %HTMLTransparentChecker,
2730 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2731 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2732 wakaba 1.1 cite => $HTMLURIAttrChecker,
2733     datetime => $HTMLDatetimeAttrChecker,
2734 wakaba 1.49 }, {
2735     %HTMLAttrStatus,
2736     %HTMLM12NCommonAttrStatus,
2737 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2738     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2739     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2740 wakaba 1.1 }),
2741 wakaba 1.40 check_end => sub {
2742     my ($self, $item, $element_state) = @_;
2743     if ($element_state->{has_significant}) {
2744     ## NOTE: Significantness flag does not propagate.
2745     } elsif ($item->{transparent}) {
2746     #
2747     } else {
2748     $self->{onerror}->(node => $item->{node},
2749     level => $self->{should_level},
2750     type => 'no significant content');
2751     }
2752 wakaba 1.1 },
2753 wakaba 1.66 check_start => sub {
2754     my ($self, $item, $element_state) = @_;
2755    
2756     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2757     },
2758 wakaba 1.1 };
2759    
2760 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2761 wakaba 1.40 %HTMLProseContentChecker,
2762 wakaba 1.48 status => FEATURE_HTML5_FD,
2763 wakaba 1.53 ## NOTE: legend, Prose | Prose, legend?
2764 wakaba 1.41 check_child_element => sub {
2765     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2766     $child_is_transparent, $element_state) = @_;
2767     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2768     $self->{onerror}->(node => $child_el,
2769     type => 'element not allowed:minus',
2770     level => $self->{must_level});
2771     $element_state->{has_non_legend} = 1;
2772     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2773     #
2774     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2775     if ($element_state->{has_legend_at_first}) {
2776     $self->{onerror}->(node => $child_el,
2777     type => 'element not allowed:figure legend',
2778     level => $self->{must_level});
2779     } elsif ($element_state->{has_legend}) {
2780     $self->{onerror}->(node => $element_state->{has_legend},
2781     type => 'element not allowed:figure legend',
2782     level => $self->{must_level});
2783     $element_state->{has_legend} = $child_el;
2784     } elsif ($element_state->{has_non_legend}) {
2785     $element_state->{has_legend} = $child_el;
2786     } else {
2787     $element_state->{has_legend_at_first} = 1;
2788 wakaba 1.35 }
2789 wakaba 1.41 delete $element_state->{has_non_legend};
2790     } else {
2791     $HTMLProseContentChecker{check_child_element}->(@_);
2792 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2793 wakaba 1.41 }
2794     },
2795     check_child_text => sub {
2796     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2797     if ($has_significant) {
2798     $element_state->{has_non_legend} = 1;
2799 wakaba 1.35 }
2800 wakaba 1.41 },
2801     check_end => sub {
2802     my ($self, $item, $element_state) = @_;
2803 wakaba 1.35
2804 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2805     #
2806     } elsif ($element_state->{has_legend}) {
2807     if ($element_state->{has_non_legend}) {
2808     $self->{onerror}->(node => $element_state->{has_legend},
2809 wakaba 1.35 type => 'element not allowed:figure legend',
2810     level => $self->{must_level});
2811     }
2812     }
2813 wakaba 1.41
2814     $HTMLProseContentChecker{check_end}->(@_);
2815     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2816 wakaba 1.35 },
2817     };
2818 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2819 wakaba 1.1
2820     $Element->{$HTML_NS}->{img} = {
2821 wakaba 1.40 %HTMLEmptyChecker,
2822 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2823 wakaba 1.40 check_attrs => sub {
2824     my ($self, $item, $element_state) = @_;
2825 wakaba 1.1 $GetHTMLAttrsChecker->({
2826     alt => sub { }, ## NOTE: No syntactical requirement
2827     src => $HTMLURIAttrChecker,
2828     usemap => $HTMLUsemapAttrChecker,
2829     ismap => sub {
2830 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2831     if (not $self->{flag}->{in_a_href}) {
2832 wakaba 1.15 $self->{onerror}->(node => $attr,
2833 wakaba 1.59 type => 'attribute not allowed:ismap',
2834     level => $self->{must_level});
2835 wakaba 1.1 }
2836 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2837 wakaba 1.1 },
2838     ## TODO: height
2839     ## TODO: width
2840 wakaba 1.49 }, {
2841     %HTMLAttrStatus,
2842     %HTMLM12NCommonAttrStatus,
2843     align => FEATURE_M12N10_REC_DEPRECATED,
2844 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2845 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2846 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2847 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2848 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2849     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2850 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2851     name => FEATURE_M12N10_REC_DEPRECATED,
2852 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2853 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2854     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2855 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2856 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2857 wakaba 1.66 })->($self, $item, $element_state);
2858 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2859     $self->{onerror}->(node => $item->{node},
2860 wakaba 1.37 type => 'attribute missing:alt',
2861     level => $self->{should_level});
2862 wakaba 1.1 }
2863 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2864     $self->{onerror}->(node => $item->{node},
2865     type => 'attribute missing:src');
2866 wakaba 1.1 }
2867 wakaba 1.66
2868     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2869     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
2870     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
2871     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
2872 wakaba 1.1 },
2873     };
2874    
2875     $Element->{$HTML_NS}->{iframe} = {
2876 wakaba 1.40 %HTMLTextChecker,
2877 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2878     ## NOTE: Not part of M12N10 Strict
2879 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2880 wakaba 1.1 src => $HTMLURIAttrChecker,
2881 wakaba 1.49 }, {
2882     %HTMLAttrStatus,
2883     %HTMLM12NCommonAttrStatus,
2884     align => FEATURE_XHTML10_REC,
2885 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2886 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2887     height => FEATURE_M12N10_REC,
2888 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2889 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2890     marginheight => FEATURE_M12N10_REC,
2891     marginwidth => FEATURE_M12N10_REC,
2892     name => FEATURE_M12N10_REC_DEPRECATED,
2893     scrolling => FEATURE_M12N10_REC,
2894 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2895     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2896 wakaba 1.49 width => FEATURE_M12N10_REC,
2897 wakaba 1.1 }),
2898 wakaba 1.66 check_start => sub {
2899     my ($self, $item, $element_state) = @_;
2900    
2901     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2902     },
2903 wakaba 1.40 };
2904    
2905 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2906 wakaba 1.40 %HTMLEmptyChecker,
2907 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2908 wakaba 1.40 check_attrs => sub {
2909     my ($self, $item, $element_state) = @_;
2910 wakaba 1.1 my $has_src;
2911 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2912 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2913     $attr_ns = '' unless defined $attr_ns;
2914     my $attr_ln = $attr->manakai_local_name;
2915     my $checker;
2916     if ($attr_ns eq '') {
2917     if ($attr_ln eq 'src') {
2918     $checker = $HTMLURIAttrChecker;
2919     $has_src = 1;
2920     } elsif ($attr_ln eq 'type') {
2921     $checker = $HTMLIMTAttrChecker;
2922     } else {
2923     ## TODO: height
2924     ## TODO: width
2925     $checker = $HTMLAttrChecker->{$attr_ln}
2926     || sub { }; ## NOTE: Any local attribute is ok.
2927     }
2928     }
2929     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2930     || $AttrChecker->{$attr_ns}->{''};
2931 wakaba 1.62
2932     my $status = {
2933     %HTMLAttrStatus,
2934     height => FEATURE_HTML5_DEFAULT,
2935     src => FEATURE_HTML5_DEFAULT,
2936     type => FEATURE_HTML5_DEFAULT,
2937     width => FEATURE_HTML5_DEFAULT,
2938     }->{$attr_ln};
2939    
2940 wakaba 1.1 if ($checker) {
2941 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
2942 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2943 wakaba 1.54 #
2944 wakaba 1.1 } else {
2945     $self->{onerror}->(node => $attr, level => 'unsupported',
2946     type => 'attribute');
2947 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2948     }
2949    
2950     if ($attr_ns eq '') {
2951     $self->_attr_status_info ($attr, $status) if $status;
2952 wakaba 1.1 }
2953     }
2954    
2955     unless ($has_src) {
2956 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2957 wakaba 1.1 type => 'attribute missing:src');
2958     }
2959 wakaba 1.66
2960     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2961 wakaba 1.1 },
2962     };
2963    
2964 wakaba 1.49 ## TODO:
2965     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2966     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2967    
2968 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2969 wakaba 1.40 %HTMLTransparentChecker,
2970 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2971 wakaba 1.40 check_attrs => sub {
2972     my ($self, $item, $element_state) = @_;
2973 wakaba 1.1 $GetHTMLAttrsChecker->({
2974     data => $HTMLURIAttrChecker,
2975     type => $HTMLIMTAttrChecker,
2976     usemap => $HTMLUsemapAttrChecker,
2977     ## TODO: width
2978     ## TODO: height
2979 wakaba 1.49 }, {
2980     %HTMLAttrStatus,
2981     %HTMLM12NCommonAttrStatus,
2982     align => FEATURE_XHTML10_REC,
2983     archive => FEATURE_M12N10_REC,
2984     border => FEATURE_XHTML10_REC,
2985     classid => FEATURE_M12N10_REC,
2986     codebase => FEATURE_M12N10_REC,
2987     codetype => FEATURE_M12N10_REC,
2988 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2989 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2990     dataformatas => FEATURE_HTML4_REC_RESERVED,
2991     datasrc => FEATURE_HTML4_REC_RESERVED,
2992     declare => FEATURE_M12N10_REC,
2993 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2994 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2995 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2996 wakaba 1.49 name => FEATURE_M12N10_REC,
2997     standby => FEATURE_M12N10_REC,
2998 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2999     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3000     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3001 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
3002 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3003 wakaba 1.66 })->($self, $item, $element_state);
3004 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
3005     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
3006     $self->{onerror}->(node => $item->{node},
3007 wakaba 1.1 type => 'attribute missing:data|type');
3008     }
3009     }
3010 wakaba 1.66
3011     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
3012     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
3013     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
3014     ## TODO: archive
3015     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
3016 wakaba 1.1 },
3017 wakaba 1.41 ## NOTE: param*, transparent (Prose)
3018     check_child_element => sub {
3019     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3020     $child_is_transparent, $element_state) = @_;
3021     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3022     $self->{onerror}->(node => $child_el,
3023     type => 'element not allowed:minus',
3024     level => $self->{must_level});
3025     $element_state->{has_non_legend} = 1;
3026     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3027     #
3028     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
3029     if ($element_state->{has_non_param}) {
3030     $self->{onerror}->(node => $child_el,
3031     type => 'element not allowed:prose',
3032     level => $self->{must_level});
3033 wakaba 1.39 }
3034 wakaba 1.41 } else {
3035     $HTMLProseContentChecker{check_child_element}->(@_);
3036     $element_state->{has_non_param} = 1;
3037 wakaba 1.39 }
3038 wakaba 1.25 },
3039 wakaba 1.41 check_child_text => sub {
3040     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3041     if ($has_significant) {
3042     $element_state->{has_non_param} = 1;
3043     }
3044 wakaba 1.42 },
3045     check_end => sub {
3046     my ($self, $item, $element_state) = @_;
3047     if ($element_state->{has_significant}) {
3048 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
3049 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
3050     ## NOTE: Transparent.
3051     } else {
3052     $self->{onerror}->(node => $item->{node},
3053     level => $self->{should_level},
3054     type => 'no significant content');
3055     }
3056     },
3057 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
3058 wakaba 1.1 };
3059 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
3060     ## What about |<section><object data><style scoped></style>x</object></section>|?
3061     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
3062 wakaba 1.1
3063     $Element->{$HTML_NS}->{param} = {
3064 wakaba 1.40 %HTMLEmptyChecker,
3065 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3066 wakaba 1.40 check_attrs => sub {
3067     my ($self, $item, $element_state) = @_;
3068 wakaba 1.1 $GetHTMLAttrsChecker->({
3069     name => sub { },
3070     value => sub { },
3071 wakaba 1.49 }, {
3072     %HTMLAttrStatus,
3073 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3074     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3075 wakaba 1.49 type => FEATURE_M12N10_REC,
3076 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3077 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
3078 wakaba 1.66 })->(@_);
3079 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
3080     $self->{onerror}->(node => $item->{node},
3081 wakaba 1.1 type => 'attribute missing:name');
3082     }
3083 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
3084     $self->{onerror}->(node => $item->{node},
3085 wakaba 1.1 type => 'attribute missing:value');
3086     }
3087 wakaba 1.66
3088     $element_state->{uri_info}->{value}->{type}->{resource} = 1;
3089 wakaba 1.1 },
3090     };
3091    
3092     $Element->{$HTML_NS}->{video} = {
3093 wakaba 1.40 %HTMLTransparentChecker,
3094 wakaba 1.48 status => FEATURE_HTML5_LC,
3095 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3096 wakaba 1.1 src => $HTMLURIAttrChecker,
3097     ## TODO: start, loopstart, loopend, end
3098     ## ISSUE: they MUST be "value time offset"s. Value?
3099 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
3100 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
3101     controls => $GetHTMLBooleanAttrChecker->('controls'),
3102 wakaba 1.59 poster => $HTMLURIAttrChecker,
3103 wakaba 1.42 ## TODO: width, height
3104 wakaba 1.50 }, {
3105     %HTMLAttrStatus,
3106     autoplay => FEATURE_HTML5_LC,
3107     controls => FEATURE_HTML5_LC,
3108     end => FEATURE_HTML5_LC,
3109     height => FEATURE_HTML5_LC,
3110     loopend => FEATURE_HTML5_LC,
3111     loopstart => FEATURE_HTML5_LC,
3112     playcount => FEATURE_HTML5_LC,
3113     poster => FEATURE_HTML5_LC,
3114     src => FEATURE_HTML5_LC,
3115     start => FEATURE_HTML5_LC,
3116     width => FEATURE_HTML5_LC,
3117 wakaba 1.1 }),
3118 wakaba 1.42 check_start => sub {
3119     my ($self, $item, $element_state) = @_;
3120     $element_state->{allow_source}
3121     = not $item->{node}->has_attribute_ns (undef, 'src');
3122     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
3123     ## NOTE: It might be set true by |check_element|.
3124 wakaba 1.66
3125     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
3126     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
3127 wakaba 1.42 },
3128     check_child_element => sub {
3129     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3130     $child_is_transparent, $element_state) = @_;
3131     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3132     $self->{onerror}->(node => $child_el,
3133     type => 'element not allowed:minus',
3134     level => $self->{must_level});
3135     delete $element_state->{allow_source};
3136     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3137     #
3138     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
3139 wakaba 1.45 unless ($element_state->{allow_source}) {
3140 wakaba 1.42 $self->{onerror}->(node => $child_el,
3141     type => 'element not allowed:prose',
3142     level => $self->{must_level});
3143     }
3144 wakaba 1.45 $element_state->{has_source} = 1;
3145 wakaba 1.1 } else {
3146 wakaba 1.42 delete $element_state->{allow_source};
3147     $HTMLProseContentChecker{check_child_element}->(@_);
3148     }
3149     },
3150     check_child_text => sub {
3151     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3152     if ($has_significant) {
3153     delete $element_state->{allow_source};
3154     }
3155     $HTMLProseContentChecker{check_child_text}->(@_);
3156     },
3157     check_end => sub {
3158     my ($self, $item, $element_state) = @_;
3159     if ($element_state->{has_source} == -1) {
3160     $self->{onerror}->(node => $item->{node},
3161     type => 'element missing:source',
3162     level => $self->{must_level});
3163 wakaba 1.1 }
3164 wakaba 1.42
3165     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
3166 wakaba 1.1 },
3167     };
3168    
3169     $Element->{$HTML_NS}->{audio} = {
3170 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
3171 wakaba 1.48 status => FEATURE_HTML5_LC,
3172 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
3173     src => $HTMLURIAttrChecker,
3174     ## TODO: start, loopstart, loopend, end
3175     ## ISSUE: they MUST be "value time offset"s. Value?
3176     ## ISSUE: playcount has no conformance creteria
3177     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
3178     controls => $GetHTMLBooleanAttrChecker->('controls'),
3179 wakaba 1.50 }, {
3180     %HTMLAttrStatus,
3181     autoplay => FEATURE_HTML5_LC,
3182     controls => FEATURE_HTML5_LC,
3183     end => FEATURE_HTML5_LC,
3184     loopend => FEATURE_HTML5_LC,
3185     loopstart => FEATURE_HTML5_LC,
3186     playcount => FEATURE_HTML5_LC,
3187     src => FEATURE_HTML5_LC,
3188     start => FEATURE_HTML5_LC,
3189 wakaba 1.42 }),
3190 wakaba 1.1 };
3191    
3192     $Element->{$HTML_NS}->{source} = {
3193 wakaba 1.40 %HTMLEmptyChecker,
3194 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3195 wakaba 1.40 check_attrs => sub {
3196     my ($self, $item, $element_state) = @_;
3197 wakaba 1.1 $GetHTMLAttrsChecker->({
3198     src => $HTMLURIAttrChecker,
3199     type => $HTMLIMTAttrChecker,
3200     media => $HTMLMQAttrChecker,
3201 wakaba 1.50 }, {
3202     %HTMLAttrStatus,
3203     media => FEATURE_HTML5_DEFAULT,
3204     src => FEATURE_HTML5_DEFAULT,
3205     type => FEATURE_HTML5_DEFAULT,
3206 wakaba 1.66 })->(@_);
3207 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
3208     $self->{onerror}->(node => $item->{node},
3209 wakaba 1.1 type => 'attribute missing:src');
3210     }
3211 wakaba 1.66
3212     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
3213 wakaba 1.1 },
3214     };
3215    
3216     $Element->{$HTML_NS}->{canvas} = {
3217 wakaba 1.40 %HTMLTransparentChecker,
3218 wakaba 1.48 status => FEATURE_HTML5_LC,
3219 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3220 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3221     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3222 wakaba 1.50 }, {
3223     %HTMLAttrStatus,
3224     height => FEATURE_HTML5_LC,
3225     width => FEATURE_HTML5_LC,
3226 wakaba 1.1 }),
3227     };
3228    
3229     $Element->{$HTML_NS}->{map} = {
3230 wakaba 1.40 %HTMLProseContentChecker,
3231 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3232 wakaba 1.40 check_attrs => sub {
3233     my ($self, $item, $element_state) = @_;
3234 wakaba 1.4 my $has_id;
3235     $GetHTMLAttrsChecker->({
3236     id => sub {
3237     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
3238     my ($self, $attr) = @_;
3239     my $value = $attr->value;
3240     if (length $value > 0) {
3241     if ($self->{id}->{$value}) {
3242     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3243     push @{$self->{id}->{$value}}, $attr;
3244     } else {
3245     $self->{id}->{$value} = [$attr];
3246     }
3247 wakaba 1.1 } else {
3248 wakaba 1.4 ## NOTE: MUST contain at least one character
3249     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3250 wakaba 1.1 }
3251 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
3252     $self->{onerror}->(node => $attr, type => 'space in ID');
3253     }
3254     $self->{map}->{$value} ||= $attr;
3255     $has_id = 1;
3256     },
3257 wakaba 1.49 }, {
3258     %HTMLAttrStatus,
3259 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3260     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3261     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3262     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3263 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
3264 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3265     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3266     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3267     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3268     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3269     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3270     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3271     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3272     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3273     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3274     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3275 wakaba 1.66 })->(@_);
3276 wakaba 1.40 $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
3277 wakaba 1.4 unless $has_id;
3278     },
3279 wakaba 1.59 check_start => sub {
3280     my ($self, $item, $element_state) = @_;
3281     $element_state->{in_map_original} = $self->{flag}->{in_map};
3282     $self->{flag}->{in_map} = 1;
3283     },
3284     check_end => sub {
3285     my ($self, $item, $element_state) = @_;
3286     delete $self->{flag}->{in_map} unless $element_state->{in_map_original};
3287     $HTMLProseContentChecker{check_end}->(@_);
3288     },
3289 wakaba 1.1 };
3290    
3291     $Element->{$HTML_NS}->{area} = {
3292 wakaba 1.40 %HTMLEmptyChecker,
3293 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3294 wakaba 1.40 check_attrs => sub {
3295     my ($self, $item, $element_state) = @_;
3296 wakaba 1.1 my %attr;
3297     my $coords;
3298 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3299 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3300     $attr_ns = '' unless defined $attr_ns;
3301     my $attr_ln = $attr->manakai_local_name;
3302     my $checker;
3303     if ($attr_ns eq '') {
3304     $checker = {
3305 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3306 wakaba 1.1 alt => sub { },
3307     ## NOTE: |alt| value has no conformance creteria.
3308     shape => $GetHTMLEnumeratedAttrChecker->({
3309     circ => -1, circle => 1,
3310     default => 1,
3311     poly => 1, polygon => -1,
3312     rect => 1, rectangle => -1,
3313     }),
3314     coords => sub {
3315     my ($self, $attr) = @_;
3316     my $value = $attr->value;
3317     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
3318     $coords = [split /,/, $value];
3319     } else {
3320     $self->{onerror}->(node => $attr,
3321     type => 'coords:syntax error');
3322     }
3323     },
3324     target => $HTMLTargetAttrChecker,
3325     href => $HTMLURIAttrChecker,
3326     ping => $HTMLSpaceURIsAttrChecker,
3327 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3328 wakaba 1.1 media => $HTMLMQAttrChecker,
3329     hreflang => $HTMLLanguageTagAttrChecker,
3330     type => $HTMLIMTAttrChecker,
3331     }->{$attr_ln};
3332     if ($checker) {
3333     $attr{$attr_ln} = $attr;
3334     } else {
3335     $checker = $HTMLAttrChecker->{$attr_ln};
3336     }
3337     }
3338     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3339     || $AttrChecker->{$attr_ns}->{''};
3340 wakaba 1.62
3341     my $status = {
3342     %HTMLAttrStatus,
3343     %HTMLM12NCommonAttrStatus,
3344     accesskey => FEATURE_M12N10_REC,
3345     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3346     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3347     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3348     hreflang => FEATURE_HTML5_DEFAULT,
3349     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3350     media => FEATURE_HTML5_DEFAULT,
3351     nohref => FEATURE_M12N10_REC,
3352     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3353     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3354     ping => FEATURE_HTML5_DEFAULT,
3355     rel => FEATURE_HTML5_DEFAULT,
3356     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3357     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3358     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3359     type => FEATURE_HTML5_DEFAULT,
3360     }->{$attr_ln};
3361    
3362 wakaba 1.1 if ($checker) {
3363 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3364 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3365 wakaba 1.54 #
3366 wakaba 1.1 } else {
3367     $self->{onerror}->(node => $attr, level => 'unsupported',
3368     type => 'attribute');
3369     ## ISSUE: No comformance createria for unknown attributes in the spec
3370     }
3371 wakaba 1.49
3372     if ($attr_ns eq '') {
3373 wakaba 1.62 $self->_attr_status_info ($attr, $status);
3374 wakaba 1.49 }
3375 wakaba 1.1 }
3376    
3377     if (defined $attr{href}) {
3378 wakaba 1.4 $self->{has_hyperlink_element} = 1;
3379 wakaba 1.1 unless (defined $attr{alt}) {
3380 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3381 wakaba 1.1 type => 'attribute missing:alt');
3382     }
3383     } else {
3384     for (qw/target ping rel media hreflang type alt/) {
3385     if (defined $attr{$_}) {
3386     $self->{onerror}->(node => $attr{$_},
3387     type => 'attribute not allowed');
3388     }
3389     }
3390     }
3391    
3392     my $shape = 'rectangle';
3393     if (defined $attr{shape}) {
3394     $shape = {
3395     circ => 'circle', circle => 'circle',
3396     default => 'default',
3397     poly => 'polygon', polygon => 'polygon',
3398     rect => 'rectangle', rectangle => 'rectangle',
3399     }->{lc $attr{shape}->value} || 'rectangle';
3400     ## TODO: ASCII lowercase?
3401     }
3402    
3403     if ($shape eq 'circle') {
3404     if (defined $attr{coords}) {
3405     if (defined $coords) {
3406     if (@$coords == 3) {
3407     if ($coords->[2] < 0) {
3408     $self->{onerror}->(node => $attr{coords},
3409     type => 'coords:out of range:2');
3410     }
3411     } else {
3412     $self->{onerror}->(node => $attr{coords},
3413     type => 'coords:number:3:'.@$coords);
3414     }
3415     } else {
3416     ## NOTE: A syntax error has been reported.
3417     }
3418     } else {
3419 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3420 wakaba 1.1 type => 'attribute missing:coords');
3421     }
3422     } elsif ($shape eq 'default') {
3423     if (defined $attr{coords}) {
3424     $self->{onerror}->(node => $attr{coords},
3425     type => 'attribute not allowed');
3426     }
3427     } elsif ($shape eq 'polygon') {
3428     if (defined $attr{coords}) {
3429     if (defined $coords) {
3430     if (@$coords >= 6) {
3431     unless (@$coords % 2 == 0) {
3432     $self->{onerror}->(node => $attr{coords},
3433     type => 'coords:number:even:'.@$coords);
3434     }
3435     } else {
3436     $self->{onerror}->(node => $attr{coords},
3437     type => 'coords:number:>=6:'.@$coords);
3438     }
3439     } else {
3440     ## NOTE: A syntax error has been reported.
3441     }
3442     } else {
3443 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3444 wakaba 1.1 type => 'attribute missing:coords');
3445     }
3446     } elsif ($shape eq 'rectangle') {
3447     if (defined $attr{coords}) {
3448     if (defined $coords) {
3449     if (@$coords == 4) {
3450     unless ($coords->[0] < $coords->[2]) {
3451     $self->{onerror}->(node => $attr{coords},
3452     type => 'coords:out of range:0');
3453     }
3454     unless ($coords->[1] < $coords->[3]) {
3455     $self->{onerror}->(node => $attr{coords},
3456     type => 'coords:out of range:1');
3457     }
3458     } else {
3459     $self->{onerror}->(node => $attr{coords},
3460     type => 'coords:number:4:'.@$coords);
3461     }
3462     } else {
3463     ## NOTE: A syntax error has been reported.
3464     }
3465     } else {
3466 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3467 wakaba 1.1 type => 'attribute missing:coords');
3468     }
3469     }
3470 wakaba 1.66
3471     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3472 wakaba 1.1 },
3473 wakaba 1.59 check_start => sub {
3474     my ($self, $item, $element_state) = @_;
3475     unless ($self->{flag}->{in_map} or
3476     not $item->{node}->manakai_parent_element) {
3477     $self->{onerror}->(node => $item->{node},
3478     type => 'element not allowed:area',
3479     level => $self->{must_level});
3480     }
3481     },
3482 wakaba 1.1 };
3483    
3484     $Element->{$HTML_NS}->{table} = {
3485 wakaba 1.40 %HTMLChecker,
3486 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3487     check_attrs => $GetHTMLAttrsChecker->({}, {
3488     %HTMLAttrStatus,
3489     %HTMLM12NCommonAttrStatus,
3490     align => FEATURE_M12N10_REC_DEPRECATED,
3491     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3492     border => FEATURE_M12N10_REC,
3493     cellpadding => FEATURE_M12N10_REC,
3494     cellspacing => FEATURE_M12N10_REC,
3495 wakaba 1.61 cols => FEATURE_RFC1942,
3496 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3497     dataformatas => FEATURE_HTML4_REC_RESERVED,
3498     datapagesize => FEATURE_M12N10_REC,
3499     datasrc => FEATURE_HTML4_REC_RESERVED,
3500     frame => FEATURE_M12N10_REC,
3501 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3502 wakaba 1.49 rules => FEATURE_M12N10_REC,
3503     summary => FEATURE_M12N10_REC,
3504     width => FEATURE_M12N10_REC,
3505     }),
3506 wakaba 1.40 check_start => sub {
3507     my ($self, $item, $element_state) = @_;
3508     $element_state->{phase} = 'before caption';
3509 wakaba 1.66
3510     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
3511 wakaba 1.40 },
3512     check_child_element => sub {
3513     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3514     $child_is_transparent, $element_state) = @_;
3515     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3516     $self->{onerror}->(node => $child_el,
3517     type => 'element not allowed:minus',
3518     level => $self->{must_level});
3519     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3520     #
3521     } elsif ($element_state->{phase} eq 'in tbodys') {
3522     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3523     #$element_state->{phase} = 'in tbodys';
3524     } elsif (not $element_state->{has_tfoot} and
3525     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3526     $element_state->{phase} = 'after tfoot';
3527     $element_state->{has_tfoot} = 1;
3528     } else {
3529     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3530     }
3531     } elsif ($element_state->{phase} eq 'in trs') {
3532     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3533     #$element_state->{phase} = 'in trs';
3534     } elsif (not $element_state->{has_tfoot} and
3535     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3536     $element_state->{phase} = 'after tfoot';
3537     $element_state->{has_tfoot} = 1;
3538     } else {
3539     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3540     }
3541     } elsif ($element_state->{phase} eq 'after thead') {
3542     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3543     $element_state->{phase} = 'in tbodys';
3544     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3545     $element_state->{phase} = 'in trs';
3546     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3547     $element_state->{phase} = 'in tbodys';
3548     $element_state->{has_tfoot} = 1;
3549     } else {
3550     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3551     }
3552     } elsif ($element_state->{phase} eq 'in colgroup') {
3553     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3554     $element_state->{phase} = 'in colgroup';
3555     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3556     $element_state->{phase} = 'after thead';
3557     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3558     $element_state->{phase} = 'in tbodys';
3559     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3560     $element_state->{phase} = 'in trs';
3561     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3562     $element_state->{phase} = 'in tbodys';
3563     $element_state->{has_tfoot} = 1;
3564     } else {
3565     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3566     }
3567     } elsif ($element_state->{phase} eq 'before caption') {
3568     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3569     $element_state->{phase} = 'in colgroup';
3570     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3571     $element_state->{phase} = 'in colgroup';
3572     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3573     $element_state->{phase} = 'after thead';
3574     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3575     $element_state->{phase} = 'in tbodys';
3576     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3577     $element_state->{phase} = 'in trs';
3578     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3579     $element_state->{phase} = 'in tbodys';
3580     $element_state->{has_tfoot} = 1;
3581     } else {
3582     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3583     }
3584     } elsif ($element_state->{phase} eq 'after tfoot') {
3585     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3586     } else {
3587     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3588     }
3589     },
3590     check_child_text => sub {
3591     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3592     if ($has_significant) {
3593     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3594 wakaba 1.1 }
3595 wakaba 1.40 },
3596     check_end => sub {
3597     my ($self, $item, $element_state) = @_;
3598 wakaba 1.1
3599     ## Table model errors
3600     require Whatpm::HTMLTable;
3601 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3602 wakaba 1.1 my %opt = @_;
3603     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3604     });
3605 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3606 wakaba 1.1
3607 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3608 wakaba 1.1 },
3609     };
3610    
3611     $Element->{$HTML_NS}->{caption} = {
3612 wakaba 1.40 %HTMLPhrasingContentChecker,
3613 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3614     check_attrs => $GetHTMLAttrsChecker->({}, {
3615     %HTMLAttrStatus,
3616     %HTMLM12NCommonAttrStatus,
3617     align => FEATURE_M12N10_REC_DEPRECATED,
3618 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3619 wakaba 1.49 }),
3620 wakaba 1.1 };
3621    
3622     $Element->{$HTML_NS}->{colgroup} = {
3623 wakaba 1.40 %HTMLEmptyChecker,
3624 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3625 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3626 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3627     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3628     ## TODO: "attribute not supported" if |col|.
3629     ## ISSUE: MUST NOT if any |col|?
3630     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3631 wakaba 1.49 }, {
3632     %HTMLAttrStatus,
3633     %HTMLM12NCommonAttrStatus,
3634     align => FEATURE_M12N10_REC,
3635     char => FEATURE_M12N10_REC,
3636     charoff => FEATURE_M12N10_REC,
3637 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3638     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3639 wakaba 1.49 valign => FEATURE_M12N10_REC,
3640     width => FEATURE_M12N10_REC,
3641 wakaba 1.1 }),
3642 wakaba 1.40 check_child_element => sub {
3643     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3644     $child_is_transparent, $element_state) = @_;
3645     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3646     $self->{onerror}->(node => $child_el,
3647     type => 'element not allowed:minus',
3648     level => $self->{must_level});
3649     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3650     #
3651     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3652     #
3653     } else {
3654     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3655     }
3656     },
3657     check_child_text => sub {
3658     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3659     if ($has_significant) {
3660     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3661 wakaba 1.1 }
3662     },
3663     };
3664    
3665     $Element->{$HTML_NS}->{col} = {
3666 wakaba 1.40 %HTMLEmptyChecker,
3667 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3668 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3669 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3670 wakaba 1.49 }, {
3671     %HTMLAttrStatus,
3672     %HTMLM12NCommonAttrStatus,
3673     align => FEATURE_M12N10_REC,
3674     char => FEATURE_M12N10_REC,
3675     charoff => FEATURE_M12N10_REC,
3676 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3677     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3678 wakaba 1.49 valign => FEATURE_M12N10_REC,
3679     width => FEATURE_M12N10_REC,
3680 wakaba 1.1 }),
3681     };
3682    
3683     $Element->{$HTML_NS}->{tbody} = {
3684 wakaba 1.40 %HTMLChecker,
3685 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3686     check_attrs => $GetHTMLAttrsChecker->({}, {
3687     %HTMLAttrStatus,
3688     %HTMLM12NCommonAttrStatus,
3689     align => FEATURE_M12N10_REC,
3690     char => FEATURE_M12N10_REC,
3691     charoff => FEATURE_M12N10_REC,
3692 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3693 wakaba 1.49 valign => FEATURE_M12N10_REC,
3694     }),
3695 wakaba 1.40 check_child_element => sub {
3696     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3697     $child_is_transparent, $element_state) = @_;
3698     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3699     $self->{onerror}->(node => $child_el,
3700     type => 'element not allowed:minus',
3701     level => $self->{must_level});
3702     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3703     #
3704     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3705     $element_state->{has_tr} = 1;
3706     } else {
3707     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3708     }
3709     },
3710     check_child_text => sub {
3711     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3712     if ($has_significant) {
3713     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3714 wakaba 1.1 }
3715 wakaba 1.40 },
3716     check_end => sub {
3717     my ($self, $item, $element_state) = @_;
3718     unless ($element_state->{has_tr}) {
3719     $self->{onerror}->(node => $item->{node},
3720     type => 'child element missing:tr');
3721 wakaba 1.1 }
3722 wakaba 1.40
3723     $HTMLChecker{check_end}->(@_);
3724 wakaba 1.1 },
3725     };
3726    
3727     $Element->{$HTML_NS}->{thead} = {
3728 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3729 wakaba 1.1 };
3730    
3731     $Element->{$HTML_NS}->{tfoot} = {
3732 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3733 wakaba 1.1 };
3734    
3735     $Element->{$HTML_NS}->{tr} = {
3736 wakaba 1.40 %HTMLChecker,
3737 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3738     check_attrs => $GetHTMLAttrsChecker->({}, {
3739     %HTMLAttrStatus,
3740     %HTMLM12NCommonAttrStatus,
3741     align => FEATURE_M12N10_REC,
3742     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3743     char => FEATURE_M12N10_REC,
3744     charoff => FEATURE_M12N10_REC,
3745 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3746 wakaba 1.49 valign => FEATURE_M12N10_REC,
3747     }),
3748 wakaba 1.40 check_child_element => sub {
3749     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3750     $child_is_transparent, $element_state) = @_;
3751     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3752     $self->{onerror}->(node => $child_el,
3753     type => 'element not allowed:minus',
3754     level => $self->{must_level});
3755     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3756     #
3757     } elsif ($child_nsuri eq $HTML_NS and
3758     ($child_ln eq 'td' or $child_ln eq 'th')) {
3759     $element_state->{has_cell} = 1;
3760     } else {
3761     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3762     }
3763     },
3764     check_child_text => sub {
3765     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3766     if ($has_significant) {
3767     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3768 wakaba 1.1 }
3769 wakaba 1.40 },
3770     check_end => sub {
3771     my ($self, $item, $element_state) = @_;
3772     unless ($element_state->{has_cell}) {
3773     $self->{onerror}->(node => $item->{node},
3774     type => 'child element missing:td|th');
3775 wakaba 1.1 }
3776 wakaba 1.40
3777     $HTMLChecker{check_end}->(@_);
3778 wakaba 1.1 },
3779     };
3780    
3781     $Element->{$HTML_NS}->{td} = {
3782 wakaba 1.40 %HTMLProseContentChecker,
3783 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3784 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3785 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3786     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3787 wakaba 1.49 }, {
3788     %HTMLAttrStatus,
3789     %HTMLM12NCommonAttrStatus,
3790     abbr => FEATURE_M12N10_REC,
3791     align => FEATURE_M12N10_REC,
3792     axis => FEATURE_M12N10_REC,
3793     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3794     char => FEATURE_M12N10_REC,
3795     charoff => FEATURE_M12N10_REC,
3796 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3797 wakaba 1.49 headers => FEATURE_M12N10_REC,
3798     height => FEATURE_M12N10_REC_DEPRECATED,
3799 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3800 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3801 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3802 wakaba 1.49 scope => FEATURE_M12N10_REC,
3803     valign => FEATURE_M12N10_REC,
3804     width => FEATURE_M12N10_REC_DEPRECATED,
3805 wakaba 1.1 }),
3806     };
3807    
3808     $Element->{$HTML_NS}->{th} = {
3809 wakaba 1.40 %HTMLPhrasingContentChecker,
3810 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3811 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3812 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3813     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3814     scope => $GetHTMLEnumeratedAttrChecker
3815     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3816 wakaba 1.49 }, {
3817     %HTMLAttrStatus,
3818     %HTMLM12NCommonAttrStatus,
3819     abbr => FEATURE_M12N10_REC,
3820     align => FEATURE_M12N10_REC,
3821     axis => FEATURE_M12N10_REC,
3822     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3823     char => FEATURE_M12N10_REC,
3824     charoff => FEATURE_M12N10_REC,
3825 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3826 wakaba 1.49 headers => FEATURE_M12N10_REC,
3827     height => FEATURE_M12N10_REC_DEPRECATED,
3828 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3829 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3830 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3831     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3832 wakaba 1.49 valign => FEATURE_M12N10_REC,
3833     width => FEATURE_M12N10_REC_DEPRECATED,
3834 wakaba 1.1 }),
3835     };
3836    
3837 wakaba 1.52 my $AttrCheckerNotImplemented = sub {
3838     my ($self, $attr) = @_;
3839     $self->{onerror}->(node => $attr, level => 'unsupported',
3840     type => 'attribute');
3841     };
3842    
3843     $Element->{$HTML_NS}->{form} = {
3844 wakaba 1.56 %HTMLProseContentChecker, ## NOTE: Flow* [WF2]
3845     ## TODO: form in form is allowed in XML [WF2]
3846 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3847     check_attrs => $GetHTMLAttrsChecker->({
3848 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3849 wakaba 1.52 'accept-charset' => $AttrCheckerNotImplemented, ## TODO: Charsets
3850     action => $HTMLURIAttrChecker, ## TODO: "User agent behavior for a value other than HTTP URI is undefined" [HTML4]
3851 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3852     enctype => $HTMLIMTAttrChecker, ## TODO: "multipart/form-data" should be used when type=file is used [HTML4] ## TODO: MUST NOT parameter [WF2]
3853     method => $GetHTMLEnumeratedAttrChecker->({
3854     get => 1, post => 1, put => 1, delete => 1,
3855     }),
3856 wakaba 1.52 ## NOTE: "get" SHOULD be used for idempotent submittion,
3857     ## "post" SHOULD be used otherwise [HTML4]. This cannot be tested.
3858     name => sub { }, # CDATA in HTML4 ## TODO: must be same as |id| (informative!) [XHTML10]
3859 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
3860     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3861 wakaba 1.52 target => $HTMLTargetAttrChecker,
3862     ## TODO: Warn for combination whose behavior is not defined.
3863     }, {
3864     %HTMLAttrStatus,
3865     %HTMLM12NCommonAttrStatus,
3866 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3867 wakaba 1.52 'accept-charset' => FEATURE_M12N10_REC,
3868 wakaba 1.56 action => FEATURE_WF2 | FEATURE_M12N10_REC,
3869     data => FEATURE_WF2,
3870     enctype => FEATURE_WF2 | FEATURE_M12N10_REC,
3871 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3872 wakaba 1.56 method => FEATURE_WF2 | FEATURE_M12N10_REC,
3873 wakaba 1.52 name => FEATURE_M12N10_REC_DEPRECATED,
3874 wakaba 1.56 onreceived => FEATURE_WF2,
3875 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3876     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3877 wakaba 1.56 replace => FEATURE_WF2,
3878 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
3879     sdasuff => FEATURE_HTML20_RFC,
3880 wakaba 1.52 target => FEATURE_M12N10_REC,
3881     }),
3882     ## TODO: Tests
3883     ## TODO: Tests for <nest/> in <form>
3884 wakaba 1.66 check_start => sub {
3885     my ($self, $item, $element_state) = @_;
3886    
3887     $element_state->{uri_info}->{action}->{type}->{action} = 1;
3888     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
3889     },
3890 wakaba 1.52 };
3891    
3892     $Element->{$HTML_NS}->{fieldset} = {
3893     %HTMLProseContentChecker, ## NOTE: legend, %Flow; ## TODO: legend
3894     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3895 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3896     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3897     ## TODO: form [WF2]
3898     }, {
3899 wakaba 1.52 %HTMLAttrStatus,
3900     %HTMLM12NCommonAttrStatus,
3901 wakaba 1.56 disabled => FEATURE_WF2,
3902     form => FEATURE_WF2,
3903 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3904     }),
3905     ## TODO: Tests
3906     ## TODO: Tests for <nest/> in <fieldset>
3907     };
3908    
3909     $Element->{$HTML_NS}->{input} = {
3910 wakaba 1.56 %HTMLEmptyChecker, ## MUST [WF2]
3911 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3912     check_attrs => $GetHTMLAttrsChecker->({
3913 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3914 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3915 wakaba 1.56 action => $HTMLURIAttrChecker,
3916 wakaba 1.52 align => $GetHTMLEnumeratedAttrChecker->({
3917     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
3918     }),
3919     alt => sub {}, ## NOTE: Text [M12N] ## TODO: |alt| should be provided for |type=image| [HTML4]
3920     ## NOTE: HTML4 has a "should" for accessibility, which cannot be tested
3921     ## here.
3922 wakaba 1.56 autocomplete => $GetHTMLEnumeratedAttrChecker->({on => 1, off => 1}),
3923     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3924 wakaba 1.52 checked => $GetHTMLBooleanAttrChecker->('checked'),
3925     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3926 wakaba 1.56 enctype => $HTMLIMTAttrChecker,
3927     ## TODO: form [WF2]
3928     ## TODO: inputmode [WF2]
3929 wakaba 1.52 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
3930 wakaba 1.56 ## TODO: list [WF2]
3931     ## TODO: max [WF2]
3932 wakaba 1.52 maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3933 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
3934     get => 1, post => 1, put => 1, delete => 1,
3935     }),
3936     ## TODO: min [WF2]
3937 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3938     readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3939 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3940     required => $GetHTMLBooleanAttrChecker->('required'),
3941 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3942     src => $HTMLURIAttrChecker,
3943 wakaba 1.56 ## TODO: step [WF2]
3944     target => $HTMLTargetAttrChecker,
3945     ## TODO: template
3946 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3947     text => 1, password => 1, checkbox => 1, radio => 1, submit => 1,
3948     reset => 1, file => 1, hidden => 1, image => 1, button => 1,
3949 wakaba 1.56 ## [WF2]
3950     datatime => 1, 'datetime-local' => 1, date => 1, month => 1, week => 1,
3951     time => 1, number => 1, range => 1, email => 1, url => 1,
3952     add => 1, remove => 1, 'move-up' => 1, 'move-down' => 1,
3953 wakaba 1.52 }),
3954     usemap => $HTMLUsemapAttrChecker,
3955 wakaba 1.56 value => sub {}, ## NOTE: CDATA [M12N] ## TODO: "optional except when the type attribute has the value "radio" or "checkbox"" [HTML4] ## TODO: constraints [WF2]
3956     ## TODO: "authors should ensure that in each set of radio buttons that one is initially "on"." [HTML4] [WF2]
3957 wakaba 1.52 }, {
3958     %HTMLAttrStatus,
3959     %HTMLM12NCommonAttrStatus,
3960 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3961 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
3962 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
3963 wakaba 1.56 action => FEATURE_WF2,
3964 wakaba 1.52 align => FEATURE_M12N10_REC_DEPRECATED,
3965     alt => FEATURE_M12N10_REC,
3966 wakaba 1.56 autocomplete => FEATURE_WF2,
3967     autofocus => FEATURE_WF2,
3968 wakaba 1.52 checked => FEATURE_M12N10_REC,
3969     datafld => FEATURE_HTML4_REC_RESERVED,
3970     dataformatas => FEATURE_HTML4_REC_RESERVED,
3971     datasrc => FEATURE_HTML4_REC_RESERVED,
3972 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
3973 wakaba 1.65 enctype => FEATURE_WF2,
3974 wakaba 1.56 form => FEATURE_WF2,
3975     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
3976 wakaba 1.52 ismap => FEATURE_M12N10_REC,
3977     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3978 wakaba 1.56 list => FEATURE_WF2,
3979     max => FEATURE_WF2,
3980     maxlength => FEATURE_WF2 | FEATURE_M12N10_REC,
3981     method => FEATURE_WF2,
3982     min => FEATURE_WF2,
3983 wakaba 1.52 name => FEATURE_M12N10_REC,
3984     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3985     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3986     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3987     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3988 wakaba 1.56 readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
3989 wakaba 1.65 replace => FEATURE_WF2,
3990 wakaba 1.56 required => FEATURE_WF2,
3991 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
3992 wakaba 1.56 size => FEATURE_WF2_DEPRECATED | FEATURE_M12N10_REC,
3993 wakaba 1.52 src => FEATURE_M12N10_REC,
3994 wakaba 1.56 step => FEATURE_WF2,
3995 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3996 wakaba 1.65 target => FEATURE_WF2,
3997 wakaba 1.56 template => FEATURE_WF2,
3998 wakaba 1.52 type => FEATURE_M12N10_REC,
3999     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
4000     value => FEATURE_M12N10_REC,
4001     }),
4002     ## TODO: Tests
4003     ## TODO: Tests for <nest/> in <input>
4004 wakaba 1.66 check_start => sub {
4005     my ($self, $item, $element_state) = @_;
4006    
4007     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4008     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4009     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4010     },
4011 wakaba 1.52 };
4012    
4013 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
4014    
4015 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
4016     %HTMLProseContentChecker, ## NOTE: %Flow; - something [XHTML10]
4017     ## TODO: -A|%formctrl;|form|fieldset [HTML4]
4018     ## TODO: image map (img) in |button| is "illegal" [HTML4].
4019     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4020     check_attrs => $GetHTMLAttrsChecker->({
4021 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4022 wakaba 1.56 action => $HTMLURIAttrChecker,
4023     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4024 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4025 wakaba 1.56 ## TODO: form [WF2]
4026     method => $GetHTMLEnumeratedAttrChecker->({
4027     get => 1, post => 1, put => 1, delete => 1,
4028     }),
4029 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
4030 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
4031     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
4032     target => $HTMLTargetAttrChecker,
4033     ## TODO: template [WF2]
4034 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
4035     button => 1, submit => 1, reset => 1,
4036     }),
4037     value => sub {}, ## NOTE: CDATA [M12N]
4038     }, {
4039     %HTMLAttrStatus,
4040     %HTMLM12NCommonAttrStatus,
4041     accesskey => FEATURE_M12N10_REC,
4042 wakaba 1.56 action => FEATURE_WF2,
4043     autofocus => FEATURE_WF2,
4044 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
4045     dataformatas => FEATURE_HTML4_REC_RESERVED,
4046     datasrc => FEATURE_HTML4_REC_RESERVED,
4047 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4048     enctype => FEATURE_WF2,
4049     form => FEATURE_WF2,
4050 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4051 wakaba 1.56 method => FEATURE_WF2,
4052 wakaba 1.52 name => FEATURE_M12N10_REC,
4053     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4054     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4055 wakaba 1.56 oninvalid => FEATURE_WF2,
4056     replace => FEATURE_WF2,
4057 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4058 wakaba 1.56 target => FEATURE_WF2,
4059     template => FEATURE_WF2,
4060 wakaba 1.52 type => FEATURE_M12N10_REC,
4061     value => FEATURE_M12N10_REC,
4062     }),
4063     ## TODO: Tests
4064     ## TODO: Tests for <nest/> in <button>
4065 wakaba 1.66 check_start => sub {
4066     my ($self, $item, $element_state) = @_;
4067    
4068     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4069     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4070     },
4071 wakaba 1.52 };
4072    
4073     $Element->{$HTML_NS}->{label} = {
4074     %HTMLPhrasingContentChecker, ## NOTE: %Inline - label [XHTML10] ## TODO: -label
4075 wakaba 1.56 ## TODO: At most one form control [WF2]
4076 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
4077     check_attrs => $GetHTMLAttrsChecker->({
4078 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4079 wakaba 1.52 for => $AttrCheckerNotImplemented, ## TODO: IDREF ## TODO: Must be |id| of control [HTML4] ## TODO: Or, "may only contain one control element"
4080     }, {
4081     %HTMLAttrStatus,
4082     %HTMLM12NCommonAttrStatus,
4083 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
4084 wakaba 1.52 for => FEATURE_M12N10_REC,
4085     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4086     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4087     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4088     }),
4089     ## TODO: Tests
4090     ## TODO: Tests for <nest/> in <label>
4091     };
4092    
4093     $Element->{$HTML_NS}->{select} = {
4094 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (optgroup|option)* [HTML4] + [WF2] ## TODO: SHOULD avoid empty and visible [WF2]
4095 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
4096     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
4097     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4098 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
4099 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4100 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4101 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4102 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4103 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
4104     ## TODO: form [WF2]
4105 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4106     name => sub {}, ## NOTE: CDATA [M12N]
4107 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
4108     ## TODO: pattern [WF2] ## TODO: |title| semantics
4109 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4110     }, {
4111     %HTMLAttrStatus,
4112     %HTMLM12NCommonAttrStatus,
4113 wakaba 1.56 accesskey => FEATURE_WF2,
4114     autofocus => FEATURE_WF2,
4115     data => FEATURE_WF2,
4116 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
4117     dataformatas => FEATURE_HTML4_REC_RESERVED,
4118     datasrc => FEATURE_HTML4_REC_RESERVED,
4119 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4120     form => FEATURE_WF2,
4121 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4122     multiple => FEATURE_M12N10_REC,
4123     name => FEATURE_M12N10_REC,
4124     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4125     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4126     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4127 wakaba 1.56 oninvalid => FEATURE_WF2,
4128     pattern => FEATURE_WF2,
4129 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4130     sdapref => FEATURE_HTML20_RFC,
4131 wakaba 1.52 size => FEATURE_M12N10_REC,
4132     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4133     }),
4134     ## TODO: Tests
4135     ## TODO: Tests for <nest/> in <select>
4136 wakaba 1.66 check_start => sub {
4137     my ($self, $item, $element_state) = @_;
4138    
4139     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4140     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4141     },
4142 wakaba 1.52 };
4143 wakaba 1.1
4144 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
4145 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (transparent | option)*
4146     ## TODO: |option| child MUST be empty [WF2]
4147 wakaba 1.52 status => FEATURE_WF2,
4148 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
4149     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
4150     }, {
4151 wakaba 1.52 %HTMLAttrStatus,
4152 wakaba 1.56 data => FEATURE_WF2,
4153 wakaba 1.52 }),
4154     ## TODO: Tests
4155     ## TODO: Tests for <nest/> in <datalist>
4156 wakaba 1.66 check_start => sub {
4157     my ($self, $item, $element_state) = @_;
4158    
4159     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4160     },
4161 wakaba 1.52 };
4162 wakaba 1.49
4163 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
4164 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (option|optgroup)* [HTML4] + [WF2] SHOULD avoid empty and visible [WF2]
4165 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
4166     check_attrs => $GetHTMLAttrsChecker->({
4167     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4168     label => sub {}, ## NOTE: Text [M12N] ## TODO: required
4169     }, {
4170     %HTMLAttrStatus,
4171     %HTMLM12NCommonAttrStatus,
4172 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4173 wakaba 1.52 label => FEATURE_M12N10_REC,
4174     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4175     }),
4176     ## TODO: Tests
4177     ## TODO: Tests for <nest/> in <optgroup>
4178     };
4179    
4180     $Element->{$HTML_NS}->{option} = {
4181     %HTMLTextChecker,
4182     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4183     check_attrs => $GetHTMLAttrsChecker->({
4184     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4185     label => sub {}, ## NOTE: Text [M12N]
4186     selected => $GetHTMLBooleanAttrChecker->('selected'),
4187     value => sub {}, ## NOTE: CDATA [M12N]
4188     }, {
4189     %HTMLAttrStatus,
4190     %HTMLM12NCommonAttrStatus,
4191 wakaba 1.56 disabled => FEATURE_WF2, FEATURE_M12N10_REC,
4192 wakaba 1.52 label => FEATURE_M12N10_REC,
4193     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4194 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4195     sdapref => FEATURE_HTML20_RFC,
4196 wakaba 1.52 selected => FEATURE_M12N10_REC,
4197     value => FEATURE_M12N10_REC,
4198     }),
4199     ## TODO: Tests
4200     ## TODO: Tests for <nest/> in <option>
4201     };
4202 wakaba 1.49
4203 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
4204     %HTMLTextChecker,
4205     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4206     check_attrs => $GetHTMLAttrsChecker->({
4207 wakaba 1.56 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type
4208 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4209 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4210     cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## TODO: SHOULD if wrap=hard [WF2]
4211 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4212 wakaba 1.56 ## TODO: form [WF2]
4213     ## TODO: inputmode [WF2]
4214     maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4215 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
4216 wakaba 1.56 ## TODO: pattern [WF2] ## TODO: |title| special semantics
4217 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
4218 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
4219     rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4220     oninvalid => $HTMLEventHandlerAttrChecker,
4221     wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
4222 wakaba 1.52 }, {
4223     %HTMLAttrStatus,
4224     %HTMLM12NCommonAttrStatus,
4225 wakaba 1.56 accept => FEATURE_WF2,
4226 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
4227 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
4228 wakaba 1.56 autofocus => FEATURE_WF2,
4229 wakaba 1.52 cols => FEATURE_M12N10_REC,
4230     datafld => FEATURE_HTML4_REC_RESERVED,
4231 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
4232     datasrc => FEATURE_HTML4_REC_RESERVED,
4233 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4234     form => FEATURE_WF2,
4235     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
4236 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4237 wakaba 1.56 maxlength => FEATURE_WF2,
4238 wakaba 1.52 name => FEATURE_M12N10_REC,
4239     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4240     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4241     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4242 wakaba 1.56 oninvalid => FEATURE_WF2,
4243 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4244 wakaba 1.56 pattern => FEATURE_WF2,
4245     readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
4246     required => FEATURE_WF2,
4247 wakaba 1.61 rows => FEATURE_M12N10_REC,
4248     sdaform => FEATURE_HTML20_RFC,
4249     sdapref => FEATURE_HTML20_RFC,
4250 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4251 wakaba 1.56 wrap => FEATURE_WF2,
4252 wakaba 1.52 }),
4253     ## TODO: Tests
4254     ## TODO: Tests for <nest/> in <textarea>
4255 wakaba 1.66 check_start => sub {
4256     my ($self, $item, $element_state) = @_;
4257    
4258     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4259     },
4260 wakaba 1.52 };
4261 wakaba 1.49
4262 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
4263 wakaba 1.56 %HTMLPhrasingContentChecker, ## Inline [WF2]
4264 wakaba 1.52 status => FEATURE_WF2,
4265 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
4266     ## TODO: for [WF2]
4267     ## TODO: form [WF2]
4268     ## TODO: name [WF2]
4269     ## onformchange[WF2]
4270     ## onforminput[WF2]
4271     }, {
4272 wakaba 1.52 %HTMLAttrStatus,
4273 wakaba 1.56 for => FEATURE_WF2,
4274     form => FEATURE_WF2,
4275     name => FEATURE_WF2,
4276     onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
4277     onformchange => FEATURE_WF2,
4278     onforminput => FEATURE_WF2,
4279 wakaba 1.52 }),
4280     ## TODO: Tests
4281     ## TODO: Tests for <nest/> in <output>
4282 wakaba 1.56 ## NOTE: "The output element should be used when ..." [WF2]
4283 wakaba 1.52 };
4284    
4285     ## TODO: repetition template
4286    
4287     $Element->{$HTML_NS}->{isindex} = {
4288     %HTMLEmptyChecker,
4289 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
4290     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
4291 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4292     prompt => sub {}, ## NOTE: Text [M12N]
4293     }, {
4294     %HTMLAttrStatus,
4295     class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4296     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4297     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4298     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4299     prompt => FEATURE_M12N10_REC_DEPRECATED,
4300 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4301 wakaba 1.52 style => FEATURE_XHTML10_REC,
4302     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4303     }),
4304     ## TODO: Tests
4305     ## TODO: Tests for <nest/> in <isindex>
4306 wakaba 1.66 check_start => sub {
4307     my ($self, $item, $element_state) = @_;
4308    
4309     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4310     },
4311 wakaba 1.52 };
4312 wakaba 1.49
4313 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
4314 wakaba 1.40 %HTMLChecker,
4315 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4316 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4317 wakaba 1.1 src => $HTMLURIAttrChecker,
4318     defer => $GetHTMLBooleanAttrChecker->('defer'),
4319     async => $GetHTMLBooleanAttrChecker->('async'),
4320     type => $HTMLIMTAttrChecker,
4321 wakaba 1.49 }, {
4322     %HTMLAttrStatus,
4323     %HTMLM12NCommonAttrStatus,
4324 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
4325 wakaba 1.49 charset => FEATURE_M12N10_REC,
4326 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4327 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
4328     for => FEATURE_HTML4_REC_RESERVED,
4329 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4330 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
4331 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4332     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4333 wakaba 1.9 }),
4334 wakaba 1.40 check_start => sub {
4335     my ($self, $item, $element_state) = @_;
4336 wakaba 1.1
4337 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
4338     $element_state->{must_be_empty} = 1;
4339 wakaba 1.1 } else {
4340     ## NOTE: No content model conformance in HTML5 spec.
4341 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
4342     my $language = $item->{node}->get_attribute_ns (undef, 'language');
4343 wakaba 1.1 if ((defined $type and $type eq '') or
4344     (defined $language and $language eq '')) {
4345     $type = 'text/javascript';
4346     } elsif (defined $type) {
4347     #
4348     } elsif (defined $language) {
4349     $type = 'text/' . $language;
4350     } else {
4351     $type = 'text/javascript';
4352     }
4353 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
4354     }
4355 wakaba 1.66
4356     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
4357 wakaba 1.40 },
4358     check_child_element => sub {
4359     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4360     $child_is_transparent, $element_state) = @_;
4361     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4362     $self->{onerror}->(node => $child_el,
4363     type => 'element not allowed:minus',
4364     level => $self->{must_level});
4365     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4366     #
4367     } else {
4368     if ($element_state->{must_be_empty}) {
4369     $self->{onerror}->(node => $child_el,
4370     type => 'element not allowed');
4371     }
4372     }
4373     },
4374     check_child_text => sub {
4375     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4376     if ($has_significant and
4377     $element_state->{must_be_empty}) {
4378     $self->{onerror}->(node => $child_node,
4379     type => 'character not allowed');
4380     }
4381     },
4382     check_end => sub {
4383     my ($self, $item, $element_state) = @_;
4384     unless ($element_state->{must_be_empty}) {
4385     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
4386     type => 'script:'.$element_state->{script_type});
4387     ## TODO: text/javascript support
4388    
4389     $HTMLChecker{check_end}->(@_);
4390 wakaba 1.1 }
4391     },
4392     };
4393 wakaba 1.25 ## ISSUE: Significant check and text child node
4394 wakaba 1.1
4395     ## NOTE: When script is disabled.
4396     $Element->{$HTML_NS}->{noscript} = {
4397 wakaba 1.40 %HTMLTransparentChecker,
4398 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4399     check_attrs => $GetHTMLAttrsChecker->({}, {
4400     %HTMLAttrStatus,
4401     %HTMLM12NCommonAttrStatus,
4402 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4403 wakaba 1.49 }),
4404 wakaba 1.40 check_start => sub {
4405     my ($self, $item, $element_state) = @_;
4406 wakaba 1.3
4407 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
4408     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
4409 wakaba 1.3 }
4410    
4411 wakaba 1.40 unless ($self->{flag}->{in_head}) {
4412     $self->_add_minus_elements ($element_state,
4413     {$HTML_NS => {noscript => 1}});
4414     }
4415 wakaba 1.3 },
4416 wakaba 1.40 check_child_element => sub {
4417     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4418     $child_is_transparent, $element_state) = @_;
4419     if ($self->{flag}->{in_head}) {
4420     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4421     $self->{onerror}->(node => $child_el,
4422     type => 'element not allowed:minus',
4423     level => $self->{must_level});
4424     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4425     #
4426     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
4427     #
4428     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
4429     if ($child_el->has_attribute_ns (undef, 'scoped')) {
4430     $self->{onerror}->(node => $child_el,
4431     type => 'element not allowed:head noscript',
4432     level => $self->{must_level});
4433     }
4434     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
4435 wakaba 1.47 my $http_equiv_attr
4436     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
4437     if ($http_equiv_attr) {
4438     ## TODO: case
4439     if (lc $http_equiv_attr->value eq 'content-type') {
4440 wakaba 1.40 $self->{onerror}->(node => $child_el,
4441 wakaba 1.34 type => 'element not allowed:head noscript',
4442     level => $self->{must_level});
4443 wakaba 1.47 } else {
4444     #
4445 wakaba 1.3 }
4446 wakaba 1.47 } else {
4447     $self->{onerror}->(node => $child_el,
4448     type => 'element not allowed:head noscript',
4449     level => $self->{must_level});
4450 wakaba 1.3 }
4451 wakaba 1.40 } else {
4452     $self->{onerror}->(node => $child_el,
4453     type => 'element not allowed:head noscript',
4454     level => $self->{must_level});
4455     }
4456     } else {
4457     $HTMLTransparentChecker{check_child_element}->(@_);
4458     }
4459     },
4460     check_child_text => sub {
4461     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4462     if ($self->{flag}->{in_head}) {
4463     if ($has_significant) {
4464     $self->{onerror}->(node => $child_node,
4465     type => 'character not allowed');
4466 wakaba 1.3 }
4467     } else {
4468 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
4469     }
4470     },
4471     check_end => sub {
4472     my ($self, $item, $element_state) = @_;
4473     $self->_remove_minus_elements ($element_state);
4474     if ($self->{flag}->{in_head}) {
4475     $HTMLChecker{check_end}->(@_);
4476     } else {
4477     $HTMLPhrasingContentChecker{check_end}->(@_);
4478 wakaba 1.3 }
4479 wakaba 1.1 },
4480     };
4481 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
4482 wakaba 1.1
4483     $Element->{$HTML_NS}->{'event-source'} = {
4484 wakaba 1.40 %HTMLEmptyChecker,
4485 wakaba 1.48 status => FEATURE_HTML5_LC,
4486 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4487 wakaba 1.1 src => $HTMLURIAttrChecker,
4488 wakaba 1.50 }, {
4489     %HTMLAttrStatus,
4490     src => FEATURE_HTML5_LC,
4491 wakaba 1.1 }),
4492 wakaba 1.66 check_start => sub {
4493     my ($self, $item, $element_state) = @_;
4494    
4495     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
4496     },
4497 wakaba 1.1 };
4498    
4499     $Element->{$HTML_NS}->{details} = {
4500 wakaba 1.40 %HTMLProseContentChecker,
4501 wakaba 1.48 status => FEATURE_HTML5_WD,
4502 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4503 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
4504 wakaba 1.50 }, {
4505     %HTMLAttrStatus,
4506 wakaba 1.59 open => FEATURE_HTML5_WD,
4507 wakaba 1.1 }),
4508 wakaba 1.43 ## NOTE: legend, Prose
4509     check_child_element => sub {
4510     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4511     $child_is_transparent, $element_state) = @_;
4512     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4513     $self->{onerror}->(node => $child_el,
4514     type => 'element not allowed:minus',
4515     level => $self->{must_level});
4516     $element_state->{has_non_legend} = 1;
4517     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4518     #
4519     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4520     if ($element_state->{has_non_legend}) {
4521     $self->{onerror}->(node => $child_el,
4522     type => 'element not allowed:details legend',
4523     level => $self->{must_level});
4524     }
4525     $element_state->{has_legend} = 1;
4526     $element_state->{has_non_legend} = 1;
4527     } else {
4528     $HTMLProseContentChecker{check_child_element}->(@_);
4529     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4530     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
4531     ## is conforming?
4532     }
4533     },
4534     check_child_text => sub {
4535     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4536     if ($has_significant) {
4537     $element_state->{has_non_legend} = 1;
4538     }
4539     },
4540     check_end => sub {
4541     my ($self, $item, $element_state) = @_;
4542 wakaba 1.1
4543 wakaba 1.43 unless ($element_state->{has_legend}) {
4544     $self->{onerror}->(node => $item->{node},
4545     type => 'element missing:legend',
4546     level => $self->{must_level});
4547     }
4548    
4549     $HTMLProseContentChecker{check_end}->(@_);
4550     ## ISSUE: |<details><legend>aa</legend></details>| error?
4551 wakaba 1.1 },
4552     };
4553    
4554     $Element->{$HTML_NS}->{datagrid} = {
4555 wakaba 1.40 %HTMLProseContentChecker,
4556 wakaba 1.48 status => FEATURE_HTML5_WD,
4557 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4558 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4559     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4560 wakaba 1.50 }, {
4561     %HTMLAttrStatus,
4562     disabled => FEATURE_HTML5_WD,
4563     multiple => FEATURE_HTML5_WD,
4564 wakaba 1.1 }),
4565 wakaba 1.40 check_start => sub {
4566     my ($self, $item, $element_state) = @_;
4567 wakaba 1.1
4568 wakaba 1.40 $self->_add_minus_elements ($element_state,
4569     {$HTML_NS => {a => 1, datagrid => 1}});
4570     $element_state->{phase} = 'any';
4571     },
4572     ## Prose -(text* table Prose*) | table | select | datalist | Empty
4573     check_child_element => sub {
4574     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4575     $child_is_transparent, $element_state) = @_;
4576     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4577     $self->{onerror}->(node => $child_el,
4578     type => 'element not allowed:minus',
4579     level => $self->{must_level});
4580     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4581     #
4582     } elsif ($element_state->{phase} eq 'prose') {
4583     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4584 wakaba 1.44 if (not $element_state->{has_element} and
4585 wakaba 1.40 $child_nsuri eq $HTML_NS and
4586     $child_ln eq 'table') {
4587     $self->{onerror}->(node => $child_el,
4588     type => 'element not allowed');
4589     } else {
4590 wakaba 1.8 #
4591 wakaba 1.1 }
4592 wakaba 1.40 } else {
4593     $self->{onerror}->(node => $child_el,
4594     type => 'element not allowed');
4595     }
4596 wakaba 1.43 $element_state->{has_element} = 1;
4597 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
4598     if ($child_nsuri eq $HTML_NS and
4599     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
4600     $element_state->{phase} = 'none';
4601     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4602     $element_state->{has_element} = 1;
4603     $element_state->{phase} = 'prose';
4604 wakaba 1.43 ## TODO: transparent?
4605 wakaba 1.40 } else {
4606     $self->{onerror}->(node => $child_el,
4607     type => 'element not allowed');
4608     }
4609     } elsif ($element_state->{phase} eq 'none') {
4610     $self->{onerror}->(node => $child_el,
4611     type => 'element not allowed');
4612     } else {
4613     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
4614     }
4615     },
4616     check_child_text => sub {
4617     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4618     if ($has_significant) {
4619     if ($element_state->{phase} eq 'prose') {
4620     #
4621     } elsif ($element_state->{phase} eq 'any') {
4622     $element_state->{phase} = 'prose';
4623     } else {
4624     $self->{onerror}->(node => $child_node,
4625     type => 'character not allowed');
4626 wakaba 1.1 }
4627     }
4628 wakaba 1.40 },
4629     check_end => sub {
4630     my ($self, $item, $element_state) = @_;
4631     $self->_remove_minus_elements ($element_state);
4632 wakaba 1.1
4633 wakaba 1.40 if ($element_state->{phase} eq 'none') {
4634     $HTMLChecker{check_end}->(@_);
4635     } else {
4636     $HTMLPhrasingContentChecker{check_end}->(@_);
4637     }
4638     },
4639 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
4640     ## are not disallowed (assuming that form control contents are also
4641     ## prose content).
4642 wakaba 1.1 };
4643    
4644     $Element->{$HTML_NS}->{command} = {
4645 wakaba 1.40 %HTMLEmptyChecker,
4646 wakaba 1.48 status => FEATURE_HTML5_WD,
4647 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4648 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
4649     default => $GetHTMLBooleanAttrChecker->('default'),
4650     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4651     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
4652     icon => $HTMLURIAttrChecker,
4653     label => sub { }, ## NOTE: No conformance creteria
4654     radiogroup => sub { }, ## NOTE: No conformance creteria
4655     type => sub {
4656     my ($self, $attr) = @_;
4657     my $value = $attr->value;
4658     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
4659     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
4660     }
4661     },
4662 wakaba 1.50 }, {
4663     %HTMLAttrStatus,
4664     checked => FEATURE_HTML5_WD,
4665     default => FEATURE_HTML5_WD,
4666     disabled => FEATURE_HTML5_WD,
4667     hidden => FEATURE_HTML5_WD,
4668     icon => FEATURE_HTML5_WD,
4669     label => FEATURE_HTML5_WD,
4670     radiogroup => FEATURE_HTML5_WD,
4671     type => FEATURE_HTML5_WD,
4672 wakaba 1.1 }),
4673 wakaba 1.66 check_start => sub {
4674     my ($self, $item, $element_state) = @_;
4675    
4676     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
4677     },
4678 wakaba 1.1 };
4679    
4680     $Element->{$HTML_NS}->{menu} = {
4681 wakaba 1.40 %HTMLPhrasingContentChecker,
4682 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
4683     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
4684     ## NOTE: We don't want any |menu| element warned as deprecated.
4685 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4686 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
4687 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
4688 wakaba 1.1 id => sub {
4689     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
4690     my ($self, $attr) = @_;
4691     my $value = $attr->value;
4692     if (length $value > 0) {
4693     if ($self->{id}->{$value}) {
4694     $self->{onerror}->(node => $attr, type => 'duplicate ID');
4695     push @{$self->{id}->{$value}}, $attr;
4696     } else {
4697     $self->{id}->{$value} = [$attr];
4698     }
4699     } else {
4700     ## NOTE: MUST contain at least one character
4701     $self->{onerror}->(node => $attr, type => 'empty attribute value');
4702     }
4703     if ($value =~ /[\x09-\x0D\x20]/) {
4704     $self->{onerror}->(node => $attr, type => 'space in ID');
4705     }
4706     $self->{menu}->{$value} ||= $attr;
4707     ## ISSUE: <menu id=""><p contextmenu=""> match?
4708     },
4709     label => sub { }, ## NOTE: No conformance creteria
4710     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
4711 wakaba 1.49 }, {
4712     %HTMLAttrStatus,
4713     %HTMLM12NCommonAttrStatus,
4714 wakaba 1.61 align => FEATURE_HTML2X_RFC,
4715 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
4716 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
4717 wakaba 1.50 label => FEATURE_HTML5_WD,
4718     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4719 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4720     sdapref => FEATURE_HTML20_RFC,
4721 wakaba 1.50 type => FEATURE_HTML5_WD,
4722 wakaba 1.1 }),
4723 wakaba 1.40 check_start => sub {
4724     my ($self, $item, $element_state) = @_;
4725     $element_state->{phase} = 'li or phrasing';
4726     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
4727     $self->{flag}->{in_menu} = 1;
4728     },
4729     check_child_element => sub {
4730     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4731     $child_is_transparent, $element_state) = @_;
4732     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4733     $self->{onerror}->(node => $child_el,
4734     type => 'element not allowed:minus',
4735     level => $self->{must_level});
4736     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4737     #
4738     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
4739     if ($element_state->{phase} eq 'li') {
4740     #
4741     } elsif ($element_state->{phase} eq 'li or phrasing') {
4742     $element_state->{phase} = 'li';
4743     } else {
4744     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4745     }
4746     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4747     if ($element_state->{phase} eq 'phrasing') {
4748     #
4749     } elsif ($element_state->{phase} eq 'li or phrasing') {
4750     $element_state->{phase} = 'phrasing';
4751     } else {
4752     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4753     }
4754     } else {
4755     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4756     }
4757     },
4758     check_child_text => sub {
4759     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4760     if ($has_significant) {
4761     if ($element_state->{phase} eq 'phrasing') {
4762     #
4763     } elsif ($element_state->{phase} eq 'li or phrasing') {
4764     $element_state->{phase} = 'phrasing';
4765     } else {
4766     $self->{onerror}->(node => $child_node,
4767     type => 'character not allowed');
4768 wakaba 1.1 }
4769     }
4770 wakaba 1.40 },
4771     check_end => sub {
4772     my ($self, $item, $element_state) = @_;
4773     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
4774    
4775     if ($element_state->{phase} eq 'li') {
4776     $HTMLChecker{check_end}->(@_);
4777     } else { # 'phrasing' or 'li or phrasing'
4778     $HTMLPhrasingContentChecker{check_end}->(@_);
4779 wakaba 1.1 }
4780     },
4781 wakaba 1.8 };
4782    
4783     $Element->{$HTML_NS}->{datatemplate} = {
4784 wakaba 1.40 %HTMLChecker,
4785 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4786 wakaba 1.40 check_child_element => sub {
4787     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4788     $child_is_transparent, $element_state) = @_;
4789     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4790     $self->{onerror}->(node => $child_el,
4791     type => 'element not allowed:minus',
4792     level => $self->{must_level});
4793     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4794     #
4795     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
4796     #
4797     } else {
4798     $self->{onerror}->(node => $child_el,
4799     type => 'element not allowed:datatemplate');
4800     }
4801     },
4802     check_child_text => sub {
4803     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4804     if ($has_significant) {
4805     $self->{onerror}->(node => $child_node, type => 'character not allowed');
4806 wakaba 1.8 }
4807     },
4808     is_xml_root => 1,
4809     };
4810    
4811     $Element->{$HTML_NS}->{rule} = {
4812 wakaba 1.40 %HTMLChecker,
4813 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4814 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4815 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
4816 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
4817 wakaba 1.50 }, {
4818     %HTMLAttrStatus,
4819     condition => FEATURE_HTML5_AT_RISK,
4820     mode => FEATURE_HTML5_AT_RISK,
4821 wakaba 1.8 }),
4822 wakaba 1.40 check_start => sub {
4823     my ($self, $item, $element_state) = @_;
4824     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
4825     },
4826     check_child_element => sub { },
4827     check_child_text => sub { },
4828     check_end => sub {
4829     my ($self, $item, $element_state) = @_;
4830     $self->_remove_plus_elements ($element_state);
4831     $HTMLChecker{check_end}->(@_);
4832 wakaba 1.8 },
4833     ## NOTE: "MAY be anything that, when the parent |datatemplate|
4834     ## is applied to some conforming data, results in a conforming DOM tree.":
4835     ## We don't check against this.
4836     };
4837    
4838     $Element->{$HTML_NS}->{nest} = {
4839 wakaba 1.40 %HTMLEmptyChecker,
4840 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4841 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4842 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
4843     mode => sub {
4844     my ($self, $attr) = @_;
4845     my $value = $attr->value;
4846     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
4847     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
4848     }
4849     },
4850 wakaba 1.50 }, {
4851     %HTMLAttrStatus,
4852     filter => FEATURE_HTML5_AT_RISK,
4853     mode => FEATURE_HTML5_AT_RISK,
4854 wakaba 1.8 }),
4855 wakaba 1.1 };
4856    
4857     $Element->{$HTML_NS}->{legend} = {
4858 wakaba 1.40 %HTMLPhrasingContentChecker,
4859 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4860 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4861 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4862 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
4863     # top => 1, bottom => 1, left => 1, right => 1,
4864     # }),
4865     }, {
4866 wakaba 1.49 %HTMLAttrStatus,
4867     %HTMLM12NCommonAttrStatus,
4868     accesskey => FEATURE_M12N10_REC,
4869     align => FEATURE_M12N10_REC_DEPRECATED,
4870 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4871 wakaba 1.49 }),
4872 wakaba 1.1 };
4873    
4874     $Element->{$HTML_NS}->{div} = {
4875 wakaba 1.40 %HTMLProseContentChecker,
4876 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4877 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
4878     align => $GetHTMLEnumeratedAttrChecker->({
4879     left => 1, center => 1, right => 1, justify => 1,
4880     }),
4881     }, {
4882 wakaba 1.49 %HTMLAttrStatus,
4883     %HTMLM12NCommonAttrStatus,
4884     align => FEATURE_M12N10_REC_DEPRECATED,
4885     datafld => FEATURE_HTML4_REC_RESERVED,
4886     dataformatas => FEATURE_HTML4_REC_RESERVED,
4887     datasrc => FEATURE_HTML4_REC_RESERVED,
4888 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4889 wakaba 1.49 }),
4890 wakaba 1.66 check_start => sub {
4891     my ($self, $item, $element_state) = @_;
4892    
4893     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4894     },
4895 wakaba 1.1 };
4896    
4897 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
4898     %HTMLProseContentChecker,
4899     status => FEATURE_M12N10_REC_DEPRECATED,
4900     check_attrs => $GetHTMLAttrsChecker->({}, {
4901     %HTMLAttrStatus,
4902     %HTMLM12NCommonAttrStatus,
4903     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4904     }),
4905     };
4906    
4907 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
4908 wakaba 1.40 %HTMLTransparentChecker,
4909 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4910 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4911     }, {
4912     %HTMLAttrStatus,
4913 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4914 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4915 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4916 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4917 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4918     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4919 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4920 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4921     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4922 wakaba 1.49 }),
4923 wakaba 1.1 };
4924 wakaba 1.49
4925 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
4926     %HTMLEmptyChecker,
4927     status => FEATURE_M12N10_REC_DEPRECATED,
4928     check_attrs => $GetHTMLAttrsChecker->({
4929     ## TODO: color, face, size
4930     }, {
4931     %HTMLAttrStatus,
4932     color => FEATURE_M12N10_REC_DEPRECATED,
4933     face => FEATURE_M12N10_REC_DEPRECATED,
4934     #id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
4935     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4936     size => FEATURE_M12N10_REC_DEPRECATED,
4937     }),
4938     };
4939    
4940 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
4941     ## class title id cols rows onload onunload style(x10)
4942     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4943     ## noframes Common, lang(xhtml10)
4944    
4945     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4946 wakaba 1.56
4947 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
4948     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
4949     ## xmp, listing sdapref[HTML2,0]
4950    
4951 wakaba 1.56 =pod
4952    
4953     WF2: Documents MUST comply to [CHARMOD].
4954     WF2: Vencor extensions MUST NOT be used.
4955    
4956 wakaba 1.61 HTML 2.0 nextid @n
4957    
4958     RFC 2659: CERTS CRYPTOPTS
4959    
4960     ISO-HTML: pre-html, divN
4961    
4962 wakaba 1.56 =cut
4963 wakaba 1.61
4964     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
4965     ## We added them only to |a|. |link| and |form| might also allow them
4966     ## in theory.
4967 wakaba 1.1
4968     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
4969    
4970     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24