/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.69 - (hide annotations) (download)
Sat Mar 22 08:23:04 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.68: +48 -5 lines
++ whatpm/t/ChangeLog	22 Mar 2008 08:19:58 -0000
	* content-model-2.dat: Test data on more HTML4 attributes
	are added.

2008-03-22  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	22 Mar 2008 08:22:41 -0000
	* HTML.pm: tr/@bgcolor, td/@bgcolor, th/@bgcolor, td/@abbr,
	th/@abbr, td/@axis, th/@axis, td/@scope, td/@nowrap,
	th/@nowrap, caption/@align , table/@frame, table/@rules,
	table/@summary, and table/@width are implemented.

2008-03-22  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.58 sub FEATURE_HTML5_ROLE () {
8     Whatpm::ContentChecker::FEATURE_STATUS_WD
9     ## TODO: svg:*/@role
10     }
11    
12 wakaba 1.54 sub FEATURE_HTML5_LC () {
13     Whatpm::ContentChecker::FEATURE_STATUS_LC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16     sub FEATURE_HTML5_AT_RISK () {
17     Whatpm::ContentChecker::FEATURE_STATUS_WD |
18     Whatpm::ContentChecker::FEATURE_ALLOWED
19     }
20     sub FEATURE_HTML5_WD () {
21     Whatpm::ContentChecker::FEATURE_STATUS_WD |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_FD () {
25     Whatpm::ContentChecker::FEATURE_STATUS_WD |
26     Whatpm::ContentChecker::FEATURE_ALLOWED
27     }
28     sub FEATURE_HTML5_DEFAULT () {
29     Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31 wakaba 1.49 }
32 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
33     ## NOTE: Was part of HTML5, but was dropped.
34 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
35     }
36 wakaba 1.54 sub FEATURE_WF2 () {
37     Whatpm::ContentChecker::FEATURE_STATUS_LC |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40 wakaba 1.56 sub FEATURE_WF2_DEPRECATED () {
41     Whatpm::ContentChecker::FEATURE_STATUS_LC
42     ## NOTE: MUST NOT be used.
43     }
44 wakaba 1.49
45 wakaba 1.61 ## NOTE: Metainformation Attributes Module by W3C XHTML2 WG.
46     sub FEATURE_RDFA_LC () {
47     Whatpm::ContentChecker::FEATURE_STATUS_LC
48     }
49 wakaba 1.58
50     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
51     ## attribute can be used- the only requirements for that matter is:
52     ## "the attribute MUST be referenced using its namespace-qualified form" (and
53     ## this is a host language conformance!).
54    
55 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
56     ## NOTE: Only additions to M12N10_REC are marked.
57     Whatpm::ContentChecker::FEATURE_STATUS_CR
58     }
59     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
60     Whatpm::ContentChecker::FEATURE_STATUS_CR |
61     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
62     }
63    
64 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
65     ## It contains a number of problems. (However, again, it's a REC!)
66 wakaba 1.54 sub FEATURE_M12N10_REC () {
67     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
68     Whatpm::ContentChecker::FEATURE_STATUS_REC
69     }
70     sub FEATURE_M12N10_REC_DEPRECATED () {
71     Whatpm::ContentChecker::FEATURE_STATUS_REC |
72     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
73     }
74 wakaba 1.58 ## NOTE: XHTML M12N 1.1 is a LC at the time of writing and no
75     ## addition from 1.0.
76 wakaba 1.49
77     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
78     ## (second edition). Only missing attributes from M12N10 abstract
79     ## definition are added.
80 wakaba 1.54 sub FEATURE_XHTML10_REC () {
81     Whatpm::ContentChecker::FEATURE_STATUS_CR
82     }
83    
84 wakaba 1.61 ## NOTE: Diff from HTML4.
85     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
86     Whatpm::ContentChecker::FEATURE_STATUS_CR
87     }
88 wakaba 1.58
89 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
90     ## 4.01). Only missing attributes from XHTML10 are added.
91 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
92     Whatpm::ContentChecker::FEATURE_STATUS_WD
93     }
94    
95     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
96     ## rather than presentational attributes (deprecated or not deprecated).
97 wakaba 1.48
98 wakaba 1.61 ## NOTE: Diff from HTML4.
99     sub FEATURE_HTML32_REC_OBSOLETE () {
100     Whatpm::ContentChecker::FEATURE_STATUS_CR |
101     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
102     ## NOTE: Lowercase normative "should".
103     }
104    
105     sub FEATURE_RFC2659 () { ## Experimental RFC
106     Whatpm::ContentChecker::FEATURE_STATUS_CR
107     }
108    
109     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
110     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
111     Whatpm::ContentChecker::FEATURE_STATUS_CR
112     }
113    
114     ## NOTE: Diff from HTML 2.0.
115     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
116     Whatpm::ContentChecker::FEATURE_STATUS_CR
117     }
118    
119     ## NOTE: Diff from HTML 3.2.
120     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
121     Whatpm::ContentChecker::FEATURE_STATUS_CR
122     }
123 wakaba 1.58
124 wakaba 1.29 ## December 2007 HTML5 Classification
125    
126     my $HTMLMetadataContent = {
127     $HTML_NS => {
128     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
129     'event-source' => 1, command => 1, datatemplate => 1,
130     ## NOTE: A |meta| with no |name| element is not allowed as
131     ## a metadata content other than |head| element.
132     meta => 1,
133 wakaba 1.56 ## NOTE: Only when empty [WF2]
134     form => 1,
135 wakaba 1.29 },
136     ## NOTE: RDF is mentioned in the HTML5 spec.
137     ## TODO: Other RDF elements?
138     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
139     };
140    
141     my $HTMLProseContent = {
142     $HTML_NS => {
143     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
144     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
145     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
146     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
147     details => 1, ## ISSUE: "Prose element" in spec.
148     datagrid => 1, ## ISSUE: "Prose element" in spec.
149     datatemplate => 1,
150     div => 1, ## ISSUE: No category in spec.
151     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
152     ## Additionally, it must be before any other element or
153     ## non-inter-element-whitespace text node.
154     style => 1,
155    
156 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
157 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
158     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
159     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
160     command => 1, font => 1,
161     a => 1,
162     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
163     ## NOTE: |area| is allowed only as a descendant of |map|.
164     area => 1,
165    
166     ins => 1, del => 1,
167    
168     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
169     menu => 1,
170    
171     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
172     canvas => 1,
173     },
174    
175     ## NOTE: Embedded
176     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
177     q<http://www.w3.org/2000/svg> => {svg => 1},
178     };
179    
180 wakaba 1.58 my $HTMLSectioningContent = {
181 wakaba 1.57 $HTML_NS => {
182     section => 1, nav => 1, article => 1, aside => 1,
183     ## NOTE: |body| is only allowed in |html| element.
184     body => 1,
185     },
186     };
187    
188 wakaba 1.58 my $HTMLSectioningRoot = {
189 wakaba 1.29 $HTML_NS => {
190 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
191 wakaba 1.29 },
192     };
193    
194     my $HTMLHeadingContent = {
195     $HTML_NS => {
196     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
197     },
198     };
199    
200     my $HTMLPhrasingContent = {
201     ## NOTE: All phrasing content is also prose content.
202     $HTML_NS => {
203 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
204 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
205     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
206     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
207     command => 1, font => 1,
208     a => 1,
209     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
210     ## NOTE: |area| is allowed only as a descendant of |map|.
211     area => 1,
212    
213     ## NOTE: Transparent.
214     ins => 1, del => 1,
215    
216     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
217     menu => 1,
218    
219     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
220     canvas => 1,
221 wakaba 1.56
222     ## NOTE: WF2
223     input => 1, ## NOTE: type=hidden
224     datalist => 1, ## NOTE: block | where |select| allowed
225 wakaba 1.29 },
226    
227     ## NOTE: Embedded
228     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
229     q<http://www.w3.org/2000/svg> => {svg => 1},
230    
231     ## NOTE: And non-inter-element-whitespace text nodes.
232     };
233    
234 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
235 wakaba 1.29
236     my $HTMLInteractiveContent = {
237     $HTML_NS => {
238     a => 1,
239 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
240 wakaba 1.29 },
241     };
242    
243 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
244     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
245    
246     ## -- Common attribute syntacx checkers
247    
248 wakaba 1.1 our $AttrChecker;
249    
250     my $GetHTMLEnumeratedAttrChecker = sub {
251     my $states = shift; # {value => conforming ? 1 : -1}
252     return sub {
253     my ($self, $attr) = @_;
254     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
255     if ($states->{$value} > 0) {
256     #
257     } elsif ($states->{$value}) {
258     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
259     } else {
260     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
261     }
262     };
263     }; # $GetHTMLEnumeratedAttrChecker
264    
265     my $GetHTMLBooleanAttrChecker = sub {
266     my $local_name = shift;
267     return sub {
268     my ($self, $attr) = @_;
269     my $value = $attr->value;
270     unless ($value eq $local_name or $value eq '') {
271     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
272     }
273     };
274     }; # $GetHTMLBooleanAttrChecker
275    
276 wakaba 1.8 ## Unordered set of space-separated tokens
277 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
278 wakaba 1.8 my ($self, $attr) = @_;
279     my %word;
280     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
281     unless ($word{$word}) {
282     $word{$word} = 1;
283     } else {
284     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
285     }
286     }
287 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
288 wakaba 1.8
289 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
290     ## whose allowed values are defined by the section on link types)
291     my $HTMLLinkTypesAttrChecker = sub {
292 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
293 wakaba 1.1 my %word;
294     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
295     unless ($word{$word}) {
296     $word{$word} = 1;
297 wakaba 1.18 } elsif ($word eq 'up') {
298     #
299 wakaba 1.1 } else {
300     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
301     }
302     }
303     ## NOTE: Case sensitive match (since HTML5 spec does not say link
304     ## types are case-insensitive and it says "The value should not
305     ## be confusingly similar to any other defined value (e.g.
306     ## differing only in case).").
307     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
308     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
309     ## values to be used conformingly.
310 wakaba 1.66
311     my $is_hyperlink;
312     my $is_resource;
313 wakaba 1.1 require Whatpm::_LinkTypeList;
314     our $LinkType;
315     for my $word (keys %word) {
316     my $def = $LinkType->{$word};
317     if (defined $def) {
318     if ($def->{status} eq 'accepted') {
319     if (defined $def->{effect}->[$a_or_area]) {
320     #
321     } else {
322     $self->{onerror}->(node => $attr,
323     type => 'link type:bad context:'.$word);
324     }
325     } elsif ($def->{status} eq 'proposal') {
326     $self->{onerror}->(node => $attr, level => 's',
327     type => 'link type:proposed:'.$word);
328 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
329     #
330     } else {
331     $self->{onerror}->(node => $attr,
332     type => 'link type:bad context:'.$word);
333     }
334 wakaba 1.1 } else { # rejected or synonym
335     $self->{onerror}->(node => $attr,
336     type => 'link type:non-conforming:'.$word);
337     }
338 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
339     if ($word eq 'alternate') {
340     #
341     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
342 wakaba 1.66 $is_hyperlink = 1;
343 wakaba 1.4 }
344     }
345 wakaba 1.1 if ($def->{unique}) {
346     unless ($self->{has_link_type}->{$word}) {
347     $self->{has_link_type}->{$word} = 1;
348     } else {
349     $self->{onerror}->(node => $attr,
350     type => 'link type:duplicate:'.$word);
351     }
352     }
353 wakaba 1.66
354     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
355     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
356     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
357     }
358 wakaba 1.1 } else {
359     $self->{onerror}->(node => $attr, level => 'unsupported',
360     type => 'link type:'.$word);
361     }
362     }
363 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
364 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
365     ## says that using both X-Pingback: header field and HTML
366     ## <link rel=pingback> is deprecated and if both appears they
367     ## SHOULD contain exactly the same value.
368     ## ISSUE: Pingback 1.0 specification defines the exact representation
369     ## of its link element, which cannot be tested by the current arch.
370     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
371     ## include any string that matches to the pattern for the rel=pingback link,
372     ## which again inpossible to test.
373     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
374 wakaba 1.12
375     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
376 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
377     ## then they SHOULD be described in different paragraphs.".
378 wakaba 1.66
379     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
380     if ($is_hyperlink or $a_or_area) {
381     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
382     }
383     if ($is_resource and not $a_or_area) {
384     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
385     }
386 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
387 wakaba 1.20
388     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
389 wakaba 1.1
390     ## URI (or IRI)
391     my $HTMLURIAttrChecker = sub {
392 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
393 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
394     my $value = $attr->value;
395     Whatpm::URIChecker->check_iri_reference ($value, sub {
396     my %opt = @_;
397     $self->{onerror}->(node => $attr, level => $opt{level},
398     type => 'URI::'.$opt{type}.
399     (defined $opt{position} ? ':'.$opt{position} : ''));
400     });
401 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
402 wakaba 1.66
403     my $attr_name = $attr->name;
404     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
405     ## TODO: absolute
406     push @{$self->{return}->{uri}->{$value} ||= []},
407     $element_state->{uri_info}->{$attr_name};
408 wakaba 1.1 }; # $HTMLURIAttrChecker
409    
410     ## A space separated list of one or more URIs (or IRIs)
411     my $HTMLSpaceURIsAttrChecker = sub {
412     my ($self, $attr) = @_;
413 wakaba 1.66
414     my $type = {ping => 'action',
415     profile => 'namespace',
416     archive => 'resource'}->{$attr->name};
417    
418 wakaba 1.1 my $i = 0;
419     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
420     Whatpm::URIChecker->check_iri_reference ($value, sub {
421     my %opt = @_;
422     $self->{onerror}->(node => $attr, level => $opt{level},
423 wakaba 1.2 type => 'URIs:'.':'.
424     $opt{type}.':'.$i.
425 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
426     });
427 wakaba 1.66
428     ## TODO: absolute
429     push @{$self->{return}->{uri}->{$value} ||= []},
430 wakaba 1.67 {node => $attr, type => {$type => 1}};
431 wakaba 1.66
432 wakaba 1.1 $i++;
433     }
434 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
435 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
436     ## ISSUE: A sequence of white space characters are conformant?
437     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
438     ## NOTE: Duplication seems not an error.
439 wakaba 1.4 $self->{has_uri_attr} = 1;
440 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
441    
442     my $HTMLDatetimeAttrChecker = sub {
443     my ($self, $attr) = @_;
444     my $value = $attr->value;
445     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
446     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
447     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
448     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
449     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
450     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
451     if $d < 1 or
452     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
453     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
454     if $M == 2 and $d == 29 and
455     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
456     } else {
457     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
458     }
459     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
460     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
461     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
462     if defined $s and $s > 59;
463     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
464     if $zh > 23;
465     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
466     if $zm > 59;
467     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
468     } else {
469     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
470     }
471     }; # $HTMLDatetimeAttrChecker
472    
473     my $HTMLIntegerAttrChecker = sub {
474     my ($self, $attr) = @_;
475     my $value = $attr->value;
476     unless ($value =~ /\A-?[0-9]+\z/) {
477     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
478     }
479     }; # $HTMLIntegerAttrChecker
480    
481     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
482     my $range_check = shift;
483     return sub {
484     my ($self, $attr) = @_;
485     my $value = $attr->value;
486     if ($value =~ /\A[0-9]+\z/) {
487     unless ($range_check->($value + 0)) {
488     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
489     }
490     } else {
491     $self->{onerror}->(node => $attr,
492     type => 'nninteger:syntax error');
493     }
494     };
495     }; # $GetHTMLNonNegativeIntegerAttrChecker
496    
497     my $GetHTMLFloatingPointNumberAttrChecker = sub {
498     my $range_check = shift;
499     return sub {
500     my ($self, $attr) = @_;
501     my $value = $attr->value;
502     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
503     unless ($range_check->($value + 0)) {
504     $self->{onerror}->(node => $attr, type => 'float:out of range');
505     }
506     } else {
507     $self->{onerror}->(node => $attr,
508     type => 'float:syntax error');
509     }
510     };
511     }; # $GetHTMLFloatingPointNumberAttrChecker
512    
513     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
514     ## ISSUE: RFC 2046 does not define syntax of media types.
515     ## ISSUE: The definition of "a valid MIME type" is unknown.
516     ## Syntactical correctness?
517     my $HTMLIMTAttrChecker = sub {
518     my ($self, $attr) = @_;
519     my $value = $attr->value;
520     ## ISSUE: RFC 2045 Content-Type header field allows insertion
521     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
522     ## ISSUE: RFC 2231 extension? Maybe no.
523     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
524     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
525     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
526     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
527     my @type = ($1, $2);
528     my $param = $3;
529     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
530     if (defined $2) {
531     push @type, $1 => $2;
532     } else {
533     my $n = $1;
534     my $v = $2;
535     $v =~ s/\\(.)/$1/gs;
536     push @type, $n => $v;
537     }
538     }
539     require Whatpm::IMTChecker;
540     Whatpm::IMTChecker->check_imt (sub {
541     my %opt = @_;
542     $self->{onerror}->(node => $attr, level => $opt{level},
543     type => 'IMT:'.$opt{type});
544     }, @type);
545     } else {
546     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
547     }
548     }; # $HTMLIMTAttrChecker
549    
550     my $HTMLLanguageTagAttrChecker = sub {
551 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
552    
553 wakaba 1.1 my ($self, $attr) = @_;
554 wakaba 1.6 my $value = $attr->value;
555     require Whatpm::LangTag;
556     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
557     my %opt = @_;
558     my $type = 'LangTag:'.$opt{type};
559     $type .= ':' . $opt{subtag} if defined $opt{subtag};
560     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
561     level => $opt{level});
562     });
563 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
564 wakaba 1.6
565     ## TODO: testdata
566 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
567    
568     ## "A valid media query [MQ]"
569     my $HTMLMQAttrChecker = sub {
570     my ($self, $attr) = @_;
571     $self->{onerror}->(node => $attr, level => 'unsupported',
572     type => 'media query');
573     ## ISSUE: What is "a valid media query"?
574     }; # $HTMLMQAttrChecker
575    
576     my $HTMLEventHandlerAttrChecker = sub {
577     my ($self, $attr) = @_;
578     $self->{onerror}->(node => $attr, level => 'unsupported',
579     type => 'event handler');
580     ## TODO: MUST contain valid ECMAScript code matching the
581     ## ECMAScript |FunctionBody| production. [ECMA262]
582     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
583     ## ISSUE: Automatic semicolon insertion does not apply?
584     ## ISSUE: Other script languages?
585     }; # $HTMLEventHandlerAttrChecker
586    
587     my $HTMLUsemapAttrChecker = sub {
588     my ($self, $attr) = @_;
589     ## MUST be a valid hashed ID reference to a |map| element
590     my $value = $attr->value;
591     if ($value =~ s/^#//) {
592     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
593     push @{$self->{usemap}}, [$value => $attr];
594     } else {
595     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
596     }
597     ## NOTE: Space characters in hashed ID references are conforming.
598     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
599     }; # $HTMLUsemapAttrChecker
600    
601     my $HTMLTargetAttrChecker = sub {
602     my ($self, $attr) = @_;
603     my $value = $attr->value;
604     if ($value =~ /^_/) {
605     $value = lc $value; ## ISSUE: ASCII case-insentitive?
606     unless ({
607     _self => 1, _parent => 1, _top => 1,
608     }->{$value}) {
609     $self->{onerror}->(node => $attr,
610     type => 'reserved browsing context name');
611     }
612     } else {
613 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
614 wakaba 1.1 }
615     }; # $HTMLTargetAttrChecker
616    
617 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
618     my ($self, $attr) = @_;
619    
620     ## ISSUE: Namespace resolution?
621    
622     my $value = $attr->value;
623    
624     require Whatpm::CSS::SelectorsParser;
625     my $p = Whatpm::CSS::SelectorsParser->new;
626     $p->{pseudo_class}->{$_} = 1 for qw/
627     active checked disabled empty enabled first-child first-of-type
628     focus hover indeterminate last-child last-of-type link only-child
629     only-of-type root target visited
630     lang nth-child nth-last-child nth-of-type nth-last-of-type not
631     -manakai-contains -manakai-current
632     /;
633    
634     $p->{pseudo_element}->{$_} = 1 for qw/
635     after before first-letter first-line
636     /;
637    
638     $p->{must_level} = $self->{must_level};
639     $p->{onerror} = sub {
640     my %opt = @_;
641     $opt{type} = 'selectors:'.$opt{type};
642     $self->{onerror}->(%opt, node => $attr);
643     };
644     $p->parse_string ($value);
645     }; # $HTMLSelectorsAttrChecker
646    
647 wakaba 1.66 my $HTMLAccesskeyAttrChecker = sub {
648     my ($self, $attr) = @_;
649    
650     ## NOTE: "character" or |%Character;| in HTML4.
651    
652     my $value = $attr->value;
653     if (length $value != 1) {
654     $self->{onerror}->(node => $attr, type => 'char:syntax error',
655     level => $self->{fact_level}); ## TODO: type
656     }
657    
658     ## NOTE: "Note. Authors should consider the input method of the expected
659     ## reader when specifying an accesskey." [HTML4] This is hard to implement,
660     ## since it depends on keyboard and so on.
661     ## NOTE: "We recommend that authors include the access key in label text
662     ## or wherever the access key is to apply." [HTML4] (informative)
663     }; # $HTMLAccesskeyAttrChecker
664    
665 wakaba 1.68 my $HTMLColorAttrChecker = sub {
666     my ($self, $attr) = @_;
667    
668     ## NOTE: HTML4 "color" or |%Color;|
669    
670     my $value = $attr->value;
671    
672     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
673     $self->{onerror}->(node => $attr, type => 'color:syntax error', ## TODO: type
674     level => $self->{fact_level});
675     }
676    
677     ## TODO: HTML4 has some guideline on usage of color.
678     }; # $HTMLColorAttrChecker
679    
680 wakaba 1.1 my $HTMLAttrChecker = {
681 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
682 wakaba 1.1 id => sub {
683     ## NOTE: |map| has its own variant of |id=""| checker
684     my ($self, $attr) = @_;
685     my $value = $attr->value;
686     if (length $value > 0) {
687     if ($self->{id}->{$value}) {
688     $self->{onerror}->(node => $attr, type => 'duplicate ID');
689     push @{$self->{id}->{$value}}, $attr;
690     } else {
691     $self->{id}->{$value} = [$attr];
692     }
693     if ($value =~ /[\x09-\x0D\x20]/) {
694     $self->{onerror}->(node => $attr, type => 'space in ID');
695     }
696     } else {
697     ## NOTE: MUST contain at least one character
698     $self->{onerror}->(node => $attr, type => 'empty attribute value');
699     }
700     },
701     title => sub {}, ## NOTE: No conformance creteria
702     lang => sub {
703     my ($self, $attr) = @_;
704 wakaba 1.6 my $value = $attr->value;
705     if ($value eq '') {
706     #
707     } else {
708     require Whatpm::LangTag;
709     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
710     my %opt = @_;
711     my $type = 'LangTag:'.$opt{type};
712     $type .= ':' . $opt{subtag} if defined $opt{subtag};
713     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
714     level => $opt{level});
715     });
716     }
717 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
718     unless ($attr->owner_document->manakai_is_html) {
719     $self->{onerror}->(node => $attr, type => 'in XML:lang');
720     }
721 wakaba 1.6
722     ## TODO: test data
723 wakaba 1.1 },
724     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
725     class => sub {
726     my ($self, $attr) = @_;
727     my %word;
728     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
729     unless ($word{$word}) {
730     $word{$word} = 1;
731     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
732     } else {
733     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
734     }
735     }
736     },
737 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
738     true => 1, false => 1, '' => 1,
739     }),
740 wakaba 1.1 contextmenu => sub {
741     my ($self, $attr) = @_;
742     my $value = $attr->value;
743     push @{$self->{contextmenu}}, [$value => $attr];
744     ## ISSUE: "The value must be the ID of a menu element in the DOM."
745     ## What is "in the DOM"? A menu Element node that is not part
746     ## of the Document tree is in the DOM? A menu Element node that
747     ## belong to another Document tree is in the DOM?
748     },
749 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
750 wakaba 1.56 ## TODO: repeat, repeat-start, repeat-min, repeat-max, repeat-template ## TODO: global
751 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
752 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
753     ## TODO: ref, template, registrationmark
754 wakaba 1.1 };
755    
756 wakaba 1.49 my %HTMLAttrStatus = (
757 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
758     contenteditable => FEATURE_HTML5_DEFAULT,
759     contextmenu => FEATURE_HTML5_WD,
760     dir => FEATURE_HTML5_DEFAULT,
761     draggable => FEATURE_HTML5_LC,
762     id => FEATURE_HTML5_DEFAULT,
763     irrelevant => FEATURE_HTML5_WD,
764     lang => FEATURE_HTML5_DEFAULT,
765     ref => FEATURE_HTML5_AT_RISK,
766     registrationmark => FEATURE_HTML5_AT_RISK,
767 wakaba 1.60 repeat => FEATURE_WF2,
768     'repeat-max' => FEATURE_WF2,
769     'repeat-min' => FEATURE_WF2,
770     'repeat-start' => FEATURE_WF2,
771     'repeat-template' => FEATURE_WF2,
772 wakaba 1.58 role => FEATURE_HTML5_ROLE,
773 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
774     template => FEATURE_HTML5_AT_RISK,
775     title => FEATURE_HTML5_DEFAULT,
776 wakaba 1.49 );
777    
778     my %HTMLM12NCommonAttrStatus = (
779 wakaba 1.61 about => FEATURE_RDFA_LC,
780 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
781 wakaba 1.61 content => FEATURE_RDFA_LC,
782     datatype => FEATURE_RDFA_LC,
783 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
784     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
785 wakaba 1.61 instanceof => FEATURE_RDFA_LC,
786 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
787     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
788     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
789     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
790     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
791     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
792     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
793     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
794     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
795     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
796 wakaba 1.61 property => FEATURE_RDFA_LC,
797     rel => FEATURE_RDFA_LC,
798     resource => FEATURE_RDFA_LC,
799     rev => FEATURE_RDFA_LC,
800 wakaba 1.55 style => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
801     FEATURE_M12N10_REC,
802 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
803 wakaba 1.49 );
804    
805 wakaba 1.1 for (qw/
806     onabort onbeforeunload onblur onchange onclick oncontextmenu
807     ondblclick ondrag ondragend ondragenter ondragleave ondragover
808     ondragstart ondrop onerror onfocus onkeydown onkeypress
809     onkeyup onload onmessage onmousedown onmousemove onmouseout
810     onmouseover onmouseup onmousewheel onresize onscroll onselect
811     onsubmit onunload
812     /) {
813     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
814 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
815 wakaba 1.1 }
816    
817     my $GetHTMLAttrsChecker = sub {
818     my $element_specific_checker = shift;
819 wakaba 1.49 my $element_specific_status = shift;
820 wakaba 1.1 return sub {
821 wakaba 1.40 my ($self, $item, $element_state) = @_;
822     for my $attr (@{$item->{node}->attributes}) {
823 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
824     $attr_ns = '' unless defined $attr_ns;
825     my $attr_ln = $attr->manakai_local_name;
826     my $checker;
827     if ($attr_ns eq '') {
828     $checker = $element_specific_checker->{$attr_ln}
829 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
830 wakaba 1.1 }
831     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
832 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
833 wakaba 1.1 if ($checker) {
834 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
835 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
836 wakaba 1.54 #
837 wakaba 1.1 } else {
838     $self->{onerror}->(node => $attr, level => 'unsupported',
839     type => 'attribute');
840 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
841     }
842     if ($attr_ns eq '') {
843     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
844 wakaba 1.1 }
845 wakaba 1.49 ## TODO: global attribute
846 wakaba 1.1 }
847     };
848     }; # $GetHTMLAttrsChecker
849    
850 wakaba 1.40 my %HTMLChecker = (
851     %Whatpm::ContentChecker::AnyChecker,
852 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
853 wakaba 1.40 );
854    
855     my %HTMLEmptyChecker = (
856     %HTMLChecker,
857     check_child_element => sub {
858     my ($self, $item, $child_el, $child_nsuri, $child_ln,
859     $child_is_transparent, $element_state) = @_;
860     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
861     $self->{onerror}->(node => $child_el,
862     type => 'element not allowed:minus',
863     level => $self->{must_level});
864     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
865     #
866     } else {
867     $self->{onerror}->(node => $child_el,
868     type => 'element not allowed:empty',
869     level => $self->{must_level});
870     }
871     },
872     check_child_text => sub {
873     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
874     if ($has_significant) {
875     $self->{onerror}->(node => $child_node,
876     type => 'character not allowed:empty',
877     level => $self->{must_level});
878     }
879     },
880     );
881    
882     my %HTMLTextChecker = (
883     %HTMLChecker,
884     check_child_element => sub {
885     my ($self, $item, $child_el, $child_nsuri, $child_ln,
886     $child_is_transparent, $element_state) = @_;
887     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
888     $self->{onerror}->(node => $child_el,
889     type => 'element not allowed:minus',
890     level => $self->{must_level});
891     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
892     #
893     } else {
894     $self->{onerror}->(node => $child_el, type => 'element not allowed');
895     }
896     },
897     );
898    
899 wakaba 1.58 ## TODO: Rename as "FlowContent" (HTML5 revision 1261)
900 wakaba 1.40 my %HTMLProseContentChecker = (
901     %HTMLChecker,
902     check_child_element => sub {
903     my ($self, $item, $child_el, $child_nsuri, $child_ln,
904     $child_is_transparent, $element_state) = @_;
905     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
906     $self->{onerror}->(node => $child_el,
907     type => 'element not allowed:minus',
908     level => $self->{must_level});
909     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
910     #
911     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
912     if ($element_state->{has_non_style} or
913     not $child_el->has_attribute_ns (undef, 'scoped')) {
914     $self->{onerror}->(node => $child_el,
915     type => 'element not allowed:prose style',
916     level => $self->{must_level});
917     }
918     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
919 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
920 wakaba 1.40 } else {
921     $element_state->{has_non_style} = 1;
922     $self->{onerror}->(node => $child_el,
923     type => 'element not allowed:prose',
924     level => $self->{must_level})
925     }
926     },
927     check_child_text => sub {
928     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
929     if ($has_significant) {
930     $element_state->{has_non_style} = 1;
931     }
932     },
933     check_end => sub {
934     my ($self, $item, $element_state) = @_;
935     if ($element_state->{has_significant}) {
936 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
937 wakaba 1.40 } elsif ($item->{transparent}) {
938     #
939     } else {
940     $self->{onerror}->(node => $item->{node},
941     level => $self->{should_level},
942     type => 'no significant content');
943     }
944     },
945     );
946    
947     my %HTMLPhrasingContentChecker = (
948     %HTMLChecker,
949     check_child_element => sub {
950     my ($self, $item, $child_el, $child_nsuri, $child_ln,
951     $child_is_transparent, $element_state) = @_;
952     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
953     $self->{onerror}->(node => $child_el,
954     type => 'element not allowed:minus',
955     level => $self->{must_level});
956     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
957     #
958     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
959     #
960     } else {
961     $self->{onerror}->(node => $child_el,
962     type => 'element not allowed:phrasing',
963     level => $self->{must_level});
964     }
965     },
966     check_end => $HTMLProseContentChecker{check_end},
967     ## NOTE: The definition for |li| assumes that the only differences
968     ## between prose and phrasing content checkers are |check_child_element|
969     ## and |check_child_text|.
970     );
971    
972     my %HTMLTransparentChecker = %HTMLProseContentChecker;
973     ## ISSUE: Significant content rule should be applied to transparent element
974 wakaba 1.46 ## with parent?
975 wakaba 1.40
976 wakaba 1.1 our $Element;
977     our $ElementDefault;
978    
979     $Element->{$HTML_NS}->{''} = {
980 wakaba 1.40 %HTMLChecker,
981 wakaba 1.1 };
982    
983     $Element->{$HTML_NS}->{html} = {
984 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
985 wakaba 1.1 is_root => 1,
986 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
987 wakaba 1.16 manifest => $HTMLURIAttrChecker,
988 wakaba 1.1 xmlns => sub {
989     my ($self, $attr) = @_;
990     my $value = $attr->value;
991     unless ($value eq $HTML_NS) {
992     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
993     }
994     unless ($attr->owner_document->manakai_is_html) {
995     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
996     ## TODO: Test
997     }
998 wakaba 1.66
999     ## TODO: Should be resolved?
1000     push @{$self->{return}->{uri}->{$value} ||= []},
1001     {node => $attr, type => {namespace => 1}};
1002 wakaba 1.1 },
1003 wakaba 1.67 version => sub {
1004     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1005     ## Though DTDs of various versions of HTML define the attribute
1006     ## as |#FIXED|, this conformance checker does no check for
1007     ## the attribute value, since what kind of check should be done
1008     ## is unknown.
1009     },
1010 wakaba 1.49 }, {
1011     %HTMLAttrStatus,
1012 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1013 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1014     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1015     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1016     manifest => FEATURE_HTML5_DEFAULT,
1017 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1018 wakaba 1.49 version => FEATURE_M12N10_REC,
1019 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
1020 wakaba 1.1 }),
1021 wakaba 1.40 check_start => sub {
1022     my ($self, $item, $element_state) = @_;
1023     $element_state->{phase} = 'before head';
1024 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1025 wakaba 1.40 },
1026     check_child_element => sub {
1027     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1028     $child_is_transparent, $element_state) = @_;
1029     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1030     $self->{onerror}->(node => $child_el,
1031     type => 'element not allowed:minus',
1032     level => $self->{must_level});
1033     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1034     #
1035     } elsif ($element_state->{phase} eq 'before head') {
1036     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1037     $element_state->{phase} = 'after head';
1038     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1039     $self->{onerror}->(node => $child_el,
1040     type => 'ps element missing:head');
1041     $element_state->{phase} = 'after body';
1042     } else {
1043     $self->{onerror}->(node => $child_el,
1044     type => 'element not allowed');
1045     }
1046     } elsif ($element_state->{phase} eq 'after head') {
1047     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1048     $element_state->{phase} = 'after body';
1049     } else {
1050     $self->{onerror}->(node => $child_el,
1051     type => 'element not allowed');
1052     }
1053     } elsif ($element_state->{phase} eq 'after body') {
1054     $self->{onerror}->(node => $child_el,
1055     type => 'element not allowed');
1056     } else {
1057     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1058     }
1059     },
1060     check_child_text => sub {
1061     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1062     if ($has_significant) {
1063     $self->{onerror}->(node => $child_node,
1064     type => 'character not allowed');
1065     }
1066     },
1067     check_end => sub {
1068     my ($self, $item, $element_state) = @_;
1069     if ($element_state->{phase} eq 'after body') {
1070     #
1071     } elsif ($element_state->{phase} eq 'before head') {
1072     $self->{onerror}->(node => $item->{node},
1073     type => 'child element missing:head');
1074     $self->{onerror}->(node => $item->{node},
1075     type => 'child element missing:body');
1076     } elsif ($element_state->{phase} eq 'after head') {
1077     $self->{onerror}->(node => $item->{node},
1078     type => 'child element missing:body');
1079     } else {
1080     die "check_end: Bad |html| phase: $element_state->{phase}";
1081     }
1082 wakaba 1.1
1083 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1084     },
1085     };
1086 wakaba 1.25
1087 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1088 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1089 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1090     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1091     }, {
1092 wakaba 1.49 %HTMLAttrStatus,
1093 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1094 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1095     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1096     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1097 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1098     }),
1099 wakaba 1.40 check_child_element => sub {
1100     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1101     $child_is_transparent, $element_state) = @_;
1102     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1103     $self->{onerror}->(node => $child_el,
1104     type => 'element not allowed:minus',
1105     level => $self->{must_level});
1106     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1107     #
1108     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1109     unless ($element_state->{has_title}) {
1110     $element_state->{has_title} = 1;
1111     } else {
1112     $self->{onerror}->(node => $child_el,
1113     type => 'element not allowed:head title',
1114     level => $self->{must_level});
1115     }
1116     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1117     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1118     $self->{onerror}->(node => $child_el,
1119     type => 'element not allowed:head style',
1120     level => $self->{must_level});
1121 wakaba 1.1 }
1122 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1123     #
1124    
1125     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1126     ## a |meta| element with none of |charset|, |name|,
1127     ## or |http-equiv| attribute is not allowed. It is non-conforming
1128     ## anyway.
1129 wakaba 1.56
1130     ## TODO: |form| MUST be empty and in XML [WF2].
1131 wakaba 1.40 } else {
1132     $self->{onerror}->(node => $child_el,
1133     type => 'element not allowed:metadata',
1134     level => $self->{must_level});
1135     }
1136     $element_state->{in_head_original} = $self->{flag}->{in_head};
1137     $self->{flag}->{in_head} = 1;
1138     },
1139     check_child_text => sub {
1140     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1141     if ($has_significant) {
1142     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1143 wakaba 1.1 }
1144 wakaba 1.40 },
1145     check_end => sub {
1146     my ($self, $item, $element_state) = @_;
1147     unless ($element_state->{has_title}) {
1148     $self->{onerror}->(node => $item->{node},
1149     type => 'child element missing:title');
1150 wakaba 1.1 }
1151 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1152 wakaba 1.1
1153 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1154 wakaba 1.1 },
1155     };
1156    
1157 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1158     %HTMLTextChecker,
1159 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1160     check_attrs => $GetHTMLAttrsChecker->({}, {
1161     %HTMLAttrStatus,
1162 wakaba 1.61 class => FEATURE_HTML5_DEFAULT | FEATURE_HTML2X_RFC,
1163 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1164     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1165     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1166 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1167 wakaba 1.49 }),
1168 wakaba 1.40 };
1169 wakaba 1.1
1170 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1171 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1172 wakaba 1.40 %HTMLEmptyChecker,
1173     check_attrs => sub {
1174     my ($self, $item, $element_state) = @_;
1175 wakaba 1.1
1176 wakaba 1.40 if ($self->{has_base}) {
1177     $self->{onerror}->(node => $item->{node},
1178     type => 'element not allowed:base');
1179     } else {
1180     $self->{has_base} = 1;
1181 wakaba 1.29 }
1182    
1183 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1184     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1185 wakaba 1.14
1186     if ($self->{has_uri_attr} and $has_href) {
1187 wakaba 1.4 ## ISSUE: Are these examples conforming?
1188     ## <head profile="a b c"><base href> (except for |profile|'s
1189     ## non-conformance)
1190     ## <title xml:base="relative"/><base href/> (maybe it should be)
1191     ## <unknown xmlns="relative"/><base href/> (assuming that
1192     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1193     ## <style>@import 'relative';</style><base href>
1194     ## <script>location.href = 'relative';</script><base href>
1195 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1196     ## an exception.
1197 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1198 wakaba 1.4 type => 'basehref after URI attribute');
1199     }
1200 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1201 wakaba 1.4 ## ISSUE: Are these examples conforming?
1202     ## <head><title xlink:href=""/><base target="name"/></head>
1203     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1204     ## (assuming that |xbl:xbl| is allowed before |base|)
1205     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1206     ## <link href=""/><base target="name"/>
1207     ## <link rel=unknown href=""><base target=name>
1208 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1209 wakaba 1.4 type => 'basetarget after hyperlink');
1210     }
1211    
1212 wakaba 1.14 if (not $has_href and not $has_target) {
1213 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1214 wakaba 1.14 type => 'attribute missing:href|target');
1215     }
1216    
1217 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1218    
1219 wakaba 1.4 return $GetHTMLAttrsChecker->({
1220     href => $HTMLURIAttrChecker,
1221     target => $HTMLTargetAttrChecker,
1222 wakaba 1.49 }, {
1223     %HTMLAttrStatus,
1224 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1225     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1226     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1227 wakaba 1.40 })->($self, $item, $element_state);
1228 wakaba 1.4 },
1229 wakaba 1.1 };
1230    
1231     $Element->{$HTML_NS}->{link} = {
1232 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1233 wakaba 1.40 %HTMLEmptyChecker,
1234     check_attrs => sub {
1235     my ($self, $item, $element_state) = @_;
1236 wakaba 1.1 $GetHTMLAttrsChecker->({
1237     href => $HTMLURIAttrChecker,
1238 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1239 wakaba 1.1 media => $HTMLMQAttrChecker,
1240     hreflang => $HTMLLanguageTagAttrChecker,
1241     type => $HTMLIMTAttrChecker,
1242     ## NOTE: Though |title| has special semantics,
1243     ## syntactically same as the |title| as global attribute.
1244 wakaba 1.49 }, {
1245     %HTMLAttrStatus,
1246     %HTMLM12NCommonAttrStatus,
1247     charset => FEATURE_M12N10_REC,
1248 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1249     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1250     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1251     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1252 wakaba 1.61 methods => FEATURE_HTML20_RFC,
1253 wakaba 1.50 rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1254 wakaba 1.49 rev => FEATURE_M12N10_REC,
1255 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1256 wakaba 1.49 target => FEATURE_M12N10_REC,
1257 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1258 wakaba 1.61 urn => FEATURE_HTML20_RFC,
1259 wakaba 1.40 })->($self, $item, $element_state);
1260     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1261     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1262 wakaba 1.4 } else {
1263 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1264 wakaba 1.1 type => 'attribute missing:href');
1265     }
1266 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1267     $self->{onerror}->(node => $item->{node},
1268 wakaba 1.1 type => 'attribute missing:rel');
1269     }
1270     },
1271     };
1272    
1273     $Element->{$HTML_NS}->{meta} = {
1274 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1275 wakaba 1.40 %HTMLEmptyChecker,
1276     check_attrs => sub {
1277     my ($self, $item, $element_state) = @_;
1278 wakaba 1.1 my $name_attr;
1279     my $http_equiv_attr;
1280     my $charset_attr;
1281     my $content_attr;
1282 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1283 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1284     $attr_ns = '' unless defined $attr_ns;
1285     my $attr_ln = $attr->manakai_local_name;
1286     my $checker;
1287     if ($attr_ns eq '') {
1288     if ($attr_ln eq 'content') {
1289     $content_attr = $attr;
1290     $checker = 1;
1291     } elsif ($attr_ln eq 'name') {
1292     $name_attr = $attr;
1293     $checker = 1;
1294     } elsif ($attr_ln eq 'http-equiv') {
1295     $http_equiv_attr = $attr;
1296     $checker = 1;
1297     } elsif ($attr_ln eq 'charset') {
1298     $charset_attr = $attr;
1299     $checker = 1;
1300 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
1301     $checker = sub {};
1302     ## NOTE: According to HTML4, values for the |scheme| attribute
1303     ## depend on |name| attribute and |profile| of |head|. Otherwise
1304     ## it is "cdata". The only profile with any scheme value defined
1305     ## is <http://dublincore.org/documents/dcq-html/> (and those
1306     ## references that profile; see
1307     ## <http://suika.fam.cx/gate/2005/sw/scheme#anchor-55> for more
1308     ## information).
1309     ## TODO: Should we implement the checking against the profile above?
1310     ## (But we don't want to implement its namespace bits. It is
1311     ## suck and obsolete in favor of HTML5's new ecosystem.)
1312 wakaba 1.1 } else {
1313     $checker = $HTMLAttrChecker->{$attr_ln}
1314 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
1315 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
1316     }
1317     } else {
1318     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1319     || $AttrChecker->{$attr_ns}->{''};
1320     }
1321 wakaba 1.62
1322     my $status = {
1323     %HTMLAttrStatus,
1324     charset => FEATURE_HTML5_DEFAULT,
1325     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1326     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1327     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1328     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1329     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1330     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1331     scheme => FEATURE_M12N10_REC,
1332     }->{$attr_ln};
1333    
1334 wakaba 1.1 if ($checker) {
1335 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
1336 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
1337 wakaba 1.54 #
1338 wakaba 1.1 } else {
1339     $self->{onerror}->(node => $attr, level => 'unsupported',
1340     type => 'attribute');
1341 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1342     }
1343    
1344     if ($attr_ns eq '') {
1345 wakaba 1.62 $self->_attr_status_info ($attr, $status);
1346 wakaba 1.1 }
1347     }
1348    
1349     if (defined $name_attr) {
1350     if (defined $http_equiv_attr) {
1351     $self->{onerror}->(node => $http_equiv_attr,
1352     type => 'attribute not allowed');
1353     } elsif (defined $charset_attr) {
1354     $self->{onerror}->(node => $charset_attr,
1355     type => 'attribute not allowed');
1356     }
1357     my $metadata_name = $name_attr->value;
1358     my $metadata_value;
1359     if (defined $content_attr) {
1360     $metadata_value = $content_attr->value;
1361     } else {
1362 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1363 wakaba 1.1 type => 'attribute missing:content');
1364     $metadata_value = '';
1365     }
1366     } elsif (defined $http_equiv_attr) {
1367     if (defined $charset_attr) {
1368     $self->{onerror}->(node => $charset_attr,
1369     type => 'attribute not allowed');
1370     }
1371     unless (defined $content_attr) {
1372 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1373 wakaba 1.1 type => 'attribute missing:content');
1374     }
1375     } elsif (defined $charset_attr) {
1376     if (defined $content_attr) {
1377     $self->{onerror}->(node => $content_attr,
1378     type => 'attribute not allowed');
1379     }
1380     } else {
1381     if (defined $content_attr) {
1382     $self->{onerror}->(node => $content_attr,
1383     type => 'attribute not allowed');
1384 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1385 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1386     } else {
1387 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1388 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1389     }
1390     }
1391    
1392 wakaba 1.32 my $check_charset_decl = sub () {
1393 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1394 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1395     for my $el (@{$parent->child_nodes}) {
1396     next unless $el->node_type == 1; # ELEMENT_NODE
1397 wakaba 1.40 unless ($el eq $item->{node}) {
1398 wakaba 1.29 ## NOTE: Not the first child element.
1399 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1400 wakaba 1.32 type => 'element not allowed:meta charset',
1401     level => $self->{must_level});
1402 wakaba 1.29 }
1403     last;
1404     ## NOTE: Entity references are not supported.
1405     }
1406     } else {
1407 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1408 wakaba 1.32 type => 'element not allowed:meta charset',
1409     level => $self->{must_level});
1410 wakaba 1.29 }
1411    
1412 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1413     $self->{onerror}->(node => $item->{node},
1414 wakaba 1.32 type => 'in XML:charset',
1415     level => $self->{must_level});
1416 wakaba 1.1 }
1417 wakaba 1.32 }; # $check_charset_decl
1418 wakaba 1.21
1419 wakaba 1.32 my $check_charset = sub ($$) {
1420     my ($attr, $charset_value) = @_;
1421 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1422     ## is not explicitly spelled in the HTML5 spec, the Character Set
1423     ## registry of IANA, which is referenced from HTML5 spec, says that
1424     ## charset name is case-insensitive.
1425     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1426    
1427     require Message::Charset::Info;
1428     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1429 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1430 wakaba 1.21 if (defined $ic) {
1431     ## TODO: Test for this case
1432     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1433     if ($charset ne $ic_charset) {
1434 wakaba 1.32 $self->{onerror}->(node => $attr,
1435 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1436 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1437     level => $self->{must_level});
1438 wakaba 1.21 }
1439     } else {
1440     ## NOTE: MUST, but not checkable, since the document is not originally
1441     ## in serialized form (or the parser does not preserve the input
1442     ## encoding information).
1443 wakaba 1.32 $self->{onerror}->(node => $attr,
1444     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1445 wakaba 1.21 level => 'unsupported');
1446     }
1447    
1448     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1449     ## Syntactically valid and registered? What about x-charset names?
1450     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1451     ($charset_value)) {
1452 wakaba 1.32 $self->{onerror}->(node => $attr,
1453     type => 'charset:syntax error:'.$charset_value, ## TODO
1454     level => $self->{must_level});
1455 wakaba 1.21 }
1456    
1457     if ($charset) {
1458     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1459     ## with no "preferred MIME name" label)?
1460     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1461     if (($charset_status &
1462     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1463     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1464 wakaba 1.32 $self->{onerror}->(node => $attr,
1465 wakaba 1.21 type => 'charset:not preferred:'.
1466 wakaba 1.32 $charset_value, ## TODO
1467     level => $self->{must_level});
1468 wakaba 1.21 }
1469     if (($charset_status &
1470     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1471     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1472     if ($charset_value =~ /^x-/) {
1473 wakaba 1.32 $self->{onerror}->(node => $attr,
1474     type => 'charset:private:'.$charset_value, ## TODO
1475 wakaba 1.21 level => $self->{good_level});
1476     } else {
1477 wakaba 1.32 $self->{onerror}->(node => $attr,
1478 wakaba 1.21 type => 'charset:not registered:'.
1479 wakaba 1.32 $charset_value, ## TODO
1480 wakaba 1.21 level => $self->{good_level});
1481     }
1482     }
1483     } elsif ($charset_value =~ /^x-/) {
1484 wakaba 1.32 $self->{onerror}->(node => $attr,
1485     type => 'charset:private:'.$charset_value, ## TODO
1486 wakaba 1.21 level => $self->{good_level});
1487     } else {
1488 wakaba 1.32 $self->{onerror}->(node => $attr,
1489     type => 'charset:not registered:'.$charset_value, ## TODO
1490 wakaba 1.21 level => $self->{good_level});
1491     }
1492    
1493 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1494     $self->{onerror}->(node => $attr,
1495 wakaba 1.22 type => 'character reference in charset',
1496     level => $self->{must_level});
1497     }
1498 wakaba 1.32 }; # $check_charset
1499    
1500     ## TODO: metadata conformance
1501    
1502     ## TODO: pragma conformance
1503     if (defined $http_equiv_attr) { ## An enumerated attribute
1504     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1505     if ({
1506     'refresh' => 1,
1507     'default-style' => 1,
1508     }->{$keyword}) {
1509     #
1510 wakaba 1.33
1511     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1512 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1513 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
1514 wakaba 1.33
1515 wakaba 1.32 $check_charset_decl->();
1516     if ($content_attr) {
1517     my $content = $content_attr->value;
1518 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
1519     [\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
1520     =(.+)\z!sx) {
1521 wakaba 1.32 $check_charset->($content_attr, $1);
1522     } else {
1523     $self->{onerror}->(node => $content_attr,
1524     type => 'meta content-type syntax error',
1525     level => $self->{must_level});
1526     }
1527     }
1528     } else {
1529     $self->{onerror}->(node => $http_equiv_attr,
1530     type => 'enumerated:invalid');
1531     }
1532     }
1533    
1534     if (defined $charset_attr) {
1535     $check_charset_decl->();
1536     $check_charset->($charset_attr, $charset_attr->value);
1537 wakaba 1.1 }
1538     },
1539     };
1540    
1541     $Element->{$HTML_NS}->{style} = {
1542 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1543 wakaba 1.40 %HTMLChecker,
1544     check_attrs => $GetHTMLAttrsChecker->({
1545 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1546     media => $HTMLMQAttrChecker,
1547     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1548     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1549     ## not different
1550 wakaba 1.49 }, {
1551     %HTMLAttrStatus,
1552 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1553     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1554     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1555     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1556     scoped => FEATURE_HTML5_DEFAULT,
1557     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1558     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1559 wakaba 1.1 }),
1560 wakaba 1.40 check_start => sub {
1561     my ($self, $item, $element_state) = @_;
1562    
1563 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1564 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1565 wakaba 1.27 if (not defined $type or
1566     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1567 wakaba 1.40 $element_state->{allow_element} = 0;
1568     $element_state->{style_type} = 'text/css';
1569     } else {
1570     $element_state->{allow_element} = 1; # unknown
1571     $element_state->{style_type} = $type; ## TODO: $type normalization
1572     }
1573     },
1574     check_child_element => sub {
1575     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1576     $child_is_transparent, $element_state) = @_;
1577     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1578     $self->{onerror}->(node => $child_el,
1579     type => 'element not allowed:minus',
1580     level => $self->{must_level});
1581     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1582     #
1583     } elsif ($element_state->{allow_element}) {
1584     #
1585     } else {
1586     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1587     }
1588     },
1589     check_child_text => sub {
1590     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1591     $element_state->{text} .= $child_node->text_content;
1592     },
1593     check_end => sub {
1594     my ($self, $item, $element_state) = @_;
1595     if ($element_state->{style_type} eq 'text/css') {
1596     $self->{onsubdoc}->({s => $element_state->{text},
1597     container_node => $item->{node},
1598 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1599 wakaba 1.27 } else {
1600 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1601     type => 'style:'.$element_state->{style_type});
1602 wakaba 1.27 }
1603 wakaba 1.40
1604     $HTMLChecker{check_end}->(@_);
1605 wakaba 1.1 },
1606     };
1607 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1608 wakaba 1.1
1609     $Element->{$HTML_NS}->{body} = {
1610 wakaba 1.40 %HTMLProseContentChecker,
1611 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1612 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1613     alink => $HTMLColorAttrChecker,
1614     background => $HTMLURIAttrChecker,
1615     bgcolor => $HTMLColorAttrChecker,
1616     link => $HTMLColorAttrChecker,
1617     text => $HTMLColorAttrChecker,
1618     vlink => $HTMLColorAttrChecker,
1619     }, {
1620 wakaba 1.49 %HTMLAttrStatus,
1621     %HTMLM12NCommonAttrStatus,
1622     alink => FEATURE_M12N10_REC_DEPRECATED,
1623     background => FEATURE_M12N10_REC_DEPRECATED,
1624     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1625 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1626 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1627 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1628     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1629 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1630     vlink => FEATURE_M12N10_REC_DEPRECATED,
1631     }),
1632 wakaba 1.68 check_start => sub {
1633     my ($self, $item, $element_state) = @_;
1634    
1635     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
1636     },
1637 wakaba 1.1 };
1638    
1639     $Element->{$HTML_NS}->{section} = {
1640 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1641 wakaba 1.40 %HTMLProseContentChecker,
1642 wakaba 1.1 };
1643    
1644     $Element->{$HTML_NS}->{nav} = {
1645 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1646 wakaba 1.40 %HTMLProseContentChecker,
1647 wakaba 1.1 };
1648    
1649     $Element->{$HTML_NS}->{article} = {
1650 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1651 wakaba 1.40 %HTMLProseContentChecker,
1652 wakaba 1.1 };
1653    
1654     $Element->{$HTML_NS}->{blockquote} = {
1655 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1656 wakaba 1.40 %HTMLProseContentChecker,
1657     check_attrs => $GetHTMLAttrsChecker->({
1658 wakaba 1.1 cite => $HTMLURIAttrChecker,
1659 wakaba 1.49 }, {
1660     %HTMLAttrStatus,
1661     %HTMLM12NCommonAttrStatus,
1662 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1663 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1664     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1665 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1666 wakaba 1.1 }),
1667 wakaba 1.66 check_start => sub {
1668     my ($self, $item, $element_state) = @_;
1669    
1670     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
1671     },
1672 wakaba 1.1 };
1673    
1674     $Element->{$HTML_NS}->{aside} = {
1675 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1676 wakaba 1.40 %HTMLProseContentChecker,
1677 wakaba 1.1 };
1678    
1679     $Element->{$HTML_NS}->{h1} = {
1680 wakaba 1.40 %HTMLPhrasingContentChecker,
1681 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1682 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1683     align => $GetHTMLEnumeratedAttrChecker->({
1684     left => 1, center => 1, right => 1, justify => 1,
1685     }),
1686     }, {
1687 wakaba 1.49 %HTMLAttrStatus,
1688     %HTMLM12NCommonAttrStatus,
1689     align => FEATURE_M12N10_REC_DEPRECATED,
1690 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1691 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1692 wakaba 1.49 }),
1693 wakaba 1.40 check_start => sub {
1694     my ($self, $item, $element_state) = @_;
1695     $self->{flag}->{has_hn} = 1;
1696 wakaba 1.1 },
1697     };
1698    
1699 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1700 wakaba 1.1
1701 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1702 wakaba 1.1
1703 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1704 wakaba 1.1
1705 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1706 wakaba 1.1
1707 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1708 wakaba 1.1
1709 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1710    
1711 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1712 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1713 wakaba 1.40 %HTMLProseContentChecker,
1714     check_start => sub {
1715     my ($self, $item, $element_state) = @_;
1716     $self->_add_minus_elements ($element_state,
1717     {$HTML_NS => {qw/header 1 footer 1/}},
1718 wakaba 1.58 $HTMLSectioningContent);
1719 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1720     $self->{flag}->{has_hn} = 0;
1721     },
1722     check_end => sub {
1723     my ($self, $item, $element_state) = @_;
1724     $self->_remove_minus_elements ($element_state);
1725     unless ($self->{flag}->{has_hn}) {
1726     $self->{onerror}->(node => $item->{node},
1727     type => 'element missing:hn');
1728     }
1729     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1730 wakaba 1.1
1731 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1732 wakaba 1.1 },
1733 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1734 wakaba 1.1 };
1735    
1736     $Element->{$HTML_NS}->{footer} = {
1737 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1738 wakaba 1.40 %HTMLProseContentChecker,
1739     check_start => sub {
1740     my ($self, $item, $element_state) = @_;
1741     $self->_add_minus_elements ($element_state,
1742     {$HTML_NS => {footer => 1}},
1743 wakaba 1.58 $HTMLSectioningContent,
1744 wakaba 1.57 $HTMLHeadingContent);
1745 wakaba 1.40 },
1746     check_end => sub {
1747     my ($self, $item, $element_state) = @_;
1748     $self->_remove_minus_elements ($element_state);
1749 wakaba 1.1
1750 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1751 wakaba 1.1 },
1752     };
1753    
1754     $Element->{$HTML_NS}->{address} = {
1755 wakaba 1.40 %HTMLProseContentChecker,
1756 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1757     check_attrs => $GetHTMLAttrsChecker->({}, {
1758     %HTMLAttrStatus,
1759     %HTMLM12NCommonAttrStatus,
1760 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1761 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1762 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1763     sdapref => FEATURE_HTML20_RFC,
1764 wakaba 1.49 }),
1765 wakaba 1.40 check_start => sub {
1766     my ($self, $item, $element_state) = @_;
1767     $self->_add_minus_elements ($element_state,
1768     {$HTML_NS => {footer => 1, address => 1}},
1769     $HTMLSectioningContent, $HTMLHeadingContent);
1770     },
1771     check_end => sub {
1772     my ($self, $item, $element_state) = @_;
1773     $self->_remove_minus_elements ($element_state);
1774 wakaba 1.29
1775 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1776 wakaba 1.29 },
1777 wakaba 1.1 };
1778    
1779     $Element->{$HTML_NS}->{p} = {
1780 wakaba 1.40 %HTMLPhrasingContentChecker,
1781 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1782 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1783     align => $GetHTMLEnumeratedAttrChecker->({
1784     left => 1, center => 1, right => 1, justify => 1,
1785     }),
1786     }, {
1787 wakaba 1.49 %HTMLAttrStatus,
1788     %HTMLM12NCommonAttrStatus,
1789     align => FEATURE_M12N10_REC_DEPRECATED,
1790 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1791 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1792 wakaba 1.49 }),
1793 wakaba 1.1 };
1794    
1795     $Element->{$HTML_NS}->{hr} = {
1796 wakaba 1.40 %HTMLEmptyChecker,
1797 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1798     check_attrs => $GetHTMLAttrsChecker->({}, {
1799     %HTMLAttrStatus,
1800     %HTMLM12NCommonAttrStatus,
1801     align => FEATURE_M12N10_REC_DEPRECATED,
1802 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1803 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1804 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1805 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
1806     width => FEATURE_M12N10_REC_DEPRECATED,
1807     }),
1808 wakaba 1.1 };
1809    
1810     $Element->{$HTML_NS}->{br} = {
1811 wakaba 1.40 %HTMLEmptyChecker,
1812 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1813 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1814     clear => $GetHTMLEnumeratedAttrChecker->({
1815     left => 1, all => 1, right => 1, none => 1,
1816     }),
1817     }, {
1818 wakaba 1.49 %HTMLAttrStatus,
1819 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1820 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1821 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1822 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
1823 wakaba 1.49 style => FEATURE_XHTML10_REC,
1824 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1825 wakaba 1.49 }),
1826 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1827     ## (This requirement is semantic so that we cannot check.)
1828 wakaba 1.1 };
1829    
1830     $Element->{$HTML_NS}->{dialog} = {
1831 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1832 wakaba 1.40 %HTMLChecker,
1833     check_start => sub {
1834     my ($self, $item, $element_state) = @_;
1835     $element_state->{phase} = 'before dt';
1836     },
1837     check_child_element => sub {
1838     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1839     $child_is_transparent, $element_state) = @_;
1840     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1841     $self->{onerror}->(node => $child_el,
1842     type => 'element not allowed:minus',
1843     level => $self->{must_level});
1844     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1845     #
1846     } elsif ($element_state->{phase} eq 'before dt') {
1847     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1848     $element_state->{phase} = 'before dd';
1849     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1850     $self->{onerror}
1851     ->(node => $child_el, type => 'ps element missing:dt');
1852     $element_state->{phase} = 'before dt';
1853     } else {
1854     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1855     }
1856     } elsif ($element_state->{phase} eq 'before dd') {
1857     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1858     $element_state->{phase} = 'before dt';
1859     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1860     $self->{onerror}
1861     ->(node => $child_el, type => 'ps element missing:dd');
1862     $element_state->{phase} = 'before dd';
1863     } else {
1864     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1865 wakaba 1.1 }
1866 wakaba 1.40 } else {
1867     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1868     }
1869     },
1870     check_child_text => sub {
1871     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1872     if ($has_significant) {
1873     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1874 wakaba 1.1 }
1875 wakaba 1.40 },
1876     check_end => sub {
1877     my ($self, $item, $element_state) = @_;
1878     if ($element_state->{phase} eq 'before dd') {
1879     $self->{onerror}->(node => $item->{node},
1880     type => 'child element missing:dd');
1881 wakaba 1.1 }
1882 wakaba 1.40
1883     $HTMLChecker{check_end}->(@_);
1884 wakaba 1.1 },
1885     };
1886    
1887     $Element->{$HTML_NS}->{pre} = {
1888 wakaba 1.40 %HTMLPhrasingContentChecker,
1889 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1890 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1891     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
1892     }, {
1893 wakaba 1.49 %HTMLAttrStatus,
1894     %HTMLM12NCommonAttrStatus,
1895 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1896 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1897 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1898     }),
1899 wakaba 1.1 };
1900    
1901     $Element->{$HTML_NS}->{ol} = {
1902 wakaba 1.40 %HTMLChecker,
1903 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1904 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1905 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
1906 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
1907 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1908 wakaba 1.69 ## TODO: HTML4 |type|
1909 wakaba 1.49 }, {
1910     %HTMLAttrStatus,
1911     %HTMLM12NCommonAttrStatus,
1912 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1913 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
1914 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1915 wakaba 1.53 reversed => FEATURE_HTML5_DEFAULT,
1916 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1917 wakaba 1.54 #start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1918     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1919 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1920 wakaba 1.1 }),
1921 wakaba 1.40 check_child_element => sub {
1922     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1923     $child_is_transparent, $element_state) = @_;
1924     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1925     $self->{onerror}->(node => $child_el,
1926     type => 'element not allowed:minus',
1927     level => $self->{must_level});
1928     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1929     #
1930     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1931     #
1932     } else {
1933     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1934 wakaba 1.1 }
1935 wakaba 1.40 },
1936     check_child_text => sub {
1937     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1938     if ($has_significant) {
1939     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1940 wakaba 1.1 }
1941     },
1942     };
1943    
1944     $Element->{$HTML_NS}->{ul} = {
1945 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1946 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1947 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1948     compact => $GetHTMLBooleanAttrChecker->('compact'),
1949 wakaba 1.69 ## TODO: HTML4 |type|
1950     ## TODO: sdaform, align
1951 wakaba 1.68 }, {
1952 wakaba 1.49 %HTMLAttrStatus,
1953     %HTMLM12NCommonAttrStatus,
1954 wakaba 1.61 align => FEATURE_HTML2X_RFC,
1955 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
1956 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1957 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1958 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1959     }),
1960 wakaba 1.1 };
1961    
1962 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
1963     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
1964     %{$Element->{$HTML_NS}->{ul}},
1965     status => FEATURE_M12N10_REC_DEPRECATED,
1966 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
1967     compact => $GetHTMLBooleanAttrChecker->('compact'),
1968     }, {
1969 wakaba 1.64 %HTMLAttrStatus,
1970     %HTMLM12NCommonAttrStatus,
1971     align => FEATURE_HTML2X_RFC,
1972     compact => FEATURE_M12N10_REC_DEPRECATED,
1973     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1974     sdaform => FEATURE_HTML20_RFC,
1975     sdapref => FEATURE_HTML20_RFC,
1976     }),
1977     };
1978    
1979 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
1980 wakaba 1.40 %HTMLProseContentChecker,
1981 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1982 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1983 wakaba 1.69 ## TODO: HTML4 |type|
1984 wakaba 1.49 value => sub {
1985 wakaba 1.1 my ($self, $attr) = @_;
1986     my $parent = $attr->owner_element->manakai_parent_element;
1987     if (defined $parent) {
1988     my $parent_ns = $parent->namespace_uri;
1989     $parent_ns = '' unless defined $parent_ns;
1990     my $parent_ln = $parent->manakai_local_name;
1991     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1992     $self->{onerror}->(node => $attr, level => 'unsupported',
1993     type => 'attribute');
1994     }
1995     }
1996     $HTMLIntegerAttrChecker->($self, $attr);
1997 wakaba 1.49 }, ## TODO: test
1998     }, {
1999     %HTMLAttrStatus,
2000     %HTMLM12NCommonAttrStatus,
2001 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2002 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2003 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2004 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2005 wakaba 1.55 #value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
2006     # FEATURE_M12N10_REC_DEPRECATED,
2007     value => FEATURE_HTML5_DEFAULT | FEATURE_XHTMLBASIC11_CR |
2008     FEATURE_M12N10_REC,
2009 wakaba 1.1 }),
2010 wakaba 1.40 check_child_element => sub {
2011     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2012     $child_is_transparent, $element_state) = @_;
2013     if ($self->{flag}->{in_menu}) {
2014     $HTMLPhrasingContentChecker{check_child_element}->(@_);
2015     } else {
2016     $HTMLProseContentChecker{check_child_element}->(@_);
2017     }
2018     },
2019     check_child_text => sub {
2020     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2021     if ($self->{flag}->{in_menu}) {
2022     $HTMLPhrasingContentChecker{check_child_text}->(@_);
2023 wakaba 1.1 } else {
2024 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
2025 wakaba 1.1 }
2026     },
2027     };
2028    
2029     $Element->{$HTML_NS}->{dl} = {
2030 wakaba 1.40 %HTMLChecker,
2031 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2032 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2033     compact => $GetHTMLBooleanAttrChecker->('compact'),
2034     }, {
2035 wakaba 1.49 %HTMLAttrStatus,
2036     %HTMLM12NCommonAttrStatus,
2037     compact => FEATURE_M12N10_REC_DEPRECATED,
2038 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2039 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2040     sdapref => FEATURE_HTML20_RFC,
2041 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2042     }),
2043 wakaba 1.40 check_start => sub {
2044     my ($self, $item, $element_state) = @_;
2045     $element_state->{phase} = 'before dt';
2046     },
2047     check_child_element => sub {
2048     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2049     $child_is_transparent, $element_state) = @_;
2050     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2051     $self->{onerror}->(node => $child_el,
2052     type => 'element not allowed:minus',
2053     level => $self->{must_level});
2054     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2055     #
2056     } elsif ($element_state->{phase} eq 'in dds') {
2057     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2058     #$element_state->{phase} = 'in dds';
2059     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2060     $element_state->{phase} = 'in dts';
2061     } else {
2062     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2063     }
2064     } elsif ($element_state->{phase} eq 'in dts') {
2065     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2066     #$element_state->{phase} = 'in dts';
2067     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2068     $element_state->{phase} = 'in dds';
2069     } else {
2070     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2071     }
2072     } elsif ($element_state->{phase} eq 'before dt') {
2073     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2074     $element_state->{phase} = 'in dts';
2075     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2076     $self->{onerror}
2077     ->(node => $child_el, type => 'ps element missing:dt');
2078     $element_state->{phase} = 'in dds';
2079     } else {
2080     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2081 wakaba 1.1 }
2082 wakaba 1.40 } else {
2083     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
2084 wakaba 1.1 }
2085 wakaba 1.40 },
2086     check_child_text => sub {
2087     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2088     if ($has_significant) {
2089     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2090     }
2091     },
2092     check_end => sub {
2093     my ($self, $item, $element_state) = @_;
2094     if ($element_state->{phase} eq 'in dts') {
2095     $self->{onerror}->(node => $item->{node},
2096     type => 'child element missing:dd');
2097 wakaba 1.1 }
2098    
2099 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2100 wakaba 1.1 },
2101     };
2102    
2103     $Element->{$HTML_NS}->{dt} = {
2104 wakaba 1.40 %HTMLPhrasingContentChecker,
2105 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2106     check_attrs => $GetHTMLAttrsChecker->({}, {
2107     %HTMLAttrStatus,
2108     %HTMLM12NCommonAttrStatus,
2109 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2110 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2111 wakaba 1.49 }),
2112 wakaba 1.1 };
2113    
2114     $Element->{$HTML_NS}->{dd} = {
2115 wakaba 1.40 %HTMLProseContentChecker,
2116 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2117     check_attrs => $GetHTMLAttrsChecker->({}, {
2118     %HTMLAttrStatus,
2119     %HTMLM12NCommonAttrStatus,
2120 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2121 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2122 wakaba 1.49 }),
2123 wakaba 1.1 };
2124    
2125     $Element->{$HTML_NS}->{a} = {
2126 wakaba 1.40 %HTMLPhrasingContentChecker,
2127 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2128 wakaba 1.40 check_attrs => sub {
2129     my ($self, $item, $element_state) = @_;
2130 wakaba 1.1 my %attr;
2131 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2132 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2133     $attr_ns = '' unless defined $attr_ns;
2134     my $attr_ln = $attr->manakai_local_name;
2135     my $checker;
2136     if ($attr_ns eq '') {
2137     $checker = {
2138 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
2139 wakaba 1.1 target => $HTMLTargetAttrChecker,
2140     href => $HTMLURIAttrChecker,
2141     ping => $HTMLSpaceURIsAttrChecker,
2142 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2143 wakaba 1.1 media => $HTMLMQAttrChecker,
2144     hreflang => $HTMLLanguageTagAttrChecker,
2145     type => $HTMLIMTAttrChecker,
2146     }->{$attr_ln};
2147     if ($checker) {
2148     $attr{$attr_ln} = $attr;
2149     } else {
2150     $checker = $HTMLAttrChecker->{$attr_ln};
2151     }
2152     }
2153     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2154     || $AttrChecker->{$attr_ns}->{''};
2155 wakaba 1.62
2156     my $status = {
2157     %HTMLAttrStatus,
2158     %HTMLM12NCommonAttrStatus,
2159     accesskey => FEATURE_M12N10_REC,
2160     charset => FEATURE_M12N10_REC,
2161     coords => FEATURE_M12N10_REC,
2162     cryptopts => FEATURE_RFC2659,
2163     dn => FEATURE_RFC2659,
2164     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2165     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2166     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2167     media => FEATURE_HTML5_DEFAULT,
2168     methods => FEATURE_HTML20_RFC,
2169     name => FEATURE_M12N10_REC_DEPRECATED,
2170     nonce => FEATURE_RFC2659,
2171     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2172     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2173     ping => FEATURE_HTML5_DEFAULT,
2174     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2175     rev => FEATURE_M12N10_REC,
2176     sdapref => FEATURE_HTML20_RFC,
2177     shape => FEATURE_M12N10_REC,
2178     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2179     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2180     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2181     urn => FEATURE_HTML20_RFC,
2182     }->{$attr_ln};
2183    
2184 wakaba 1.1 if ($checker) {
2185 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2186 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2187 wakaba 1.54 #
2188 wakaba 1.1 } else {
2189     $self->{onerror}->(node => $attr, level => 'unsupported',
2190     type => 'attribute');
2191 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
2192 wakaba 1.1 }
2193 wakaba 1.49
2194     if ($attr_ns eq '') {
2195 wakaba 1.62 $self->_attr_status_info ($attr, $status);
2196 wakaba 1.49 }
2197 wakaba 1.1 }
2198    
2199 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
2200 wakaba 1.4 if (defined $attr{href}) {
2201     $self->{has_hyperlink_element} = 1;
2202 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
2203 wakaba 1.4 } else {
2204 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
2205     if (defined $attr{$_}) {
2206     $self->{onerror}->(node => $attr{$_},
2207     type => 'attribute not allowed');
2208     }
2209     }
2210     }
2211 wakaba 1.66
2212     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
2213 wakaba 1.1 },
2214 wakaba 1.40 check_start => sub {
2215     my ($self, $item, $element_state) = @_;
2216     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
2217     },
2218     check_end => sub {
2219     my ($self, $item, $element_state) = @_;
2220     $self->_remove_minus_elements ($element_state);
2221 wakaba 1.59 delete $self->{flag}->{in_a_href}
2222     unless $element_state->{in_a_href_original};
2223 wakaba 1.1
2224 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2225 wakaba 1.1 },
2226     };
2227    
2228     $Element->{$HTML_NS}->{q} = {
2229 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2230 wakaba 1.40 %HTMLPhrasingContentChecker,
2231     check_attrs => $GetHTMLAttrsChecker->({
2232 wakaba 1.50 cite => $HTMLURIAttrChecker,
2233     }, {
2234 wakaba 1.49 %HTMLAttrStatus,
2235     %HTMLM12NCommonAttrStatus,
2236 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2237     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2238 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2239     sdasuff => FEATURE_HTML2X_RFC,
2240 wakaba 1.1 }),
2241 wakaba 1.66 check_start => sub {
2242     my ($self, $item, $element_state) = @_;
2243    
2244     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2245     },
2246 wakaba 1.1 };
2247    
2248     $Element->{$HTML_NS}->{cite} = {
2249 wakaba 1.40 %HTMLPhrasingContentChecker,
2250 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2251     check_attrs => $GetHTMLAttrsChecker->({}, {
2252     %HTMLAttrStatus,
2253     %HTMLM12NCommonAttrStatus,
2254 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2255 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2256 wakaba 1.49 }),
2257 wakaba 1.1 };
2258    
2259     $Element->{$HTML_NS}->{em} = {
2260 wakaba 1.40 %HTMLPhrasingContentChecker,
2261 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2262     check_attrs => $GetHTMLAttrsChecker->({}, {
2263     %HTMLAttrStatus,
2264     %HTMLM12NCommonAttrStatus,
2265 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2266 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2267 wakaba 1.49 }),
2268 wakaba 1.1 };
2269    
2270     $Element->{$HTML_NS}->{strong} = {
2271 wakaba 1.40 %HTMLPhrasingContentChecker,
2272 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2273     check_attrs => $GetHTMLAttrsChecker->({}, {
2274     %HTMLAttrStatus,
2275     %HTMLM12NCommonAttrStatus,
2276 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2277 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2278 wakaba 1.49 }),
2279 wakaba 1.1 };
2280    
2281     $Element->{$HTML_NS}->{small} = {
2282 wakaba 1.40 %HTMLPhrasingContentChecker,
2283 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2284     check_attrs => $GetHTMLAttrsChecker->({}, {
2285     %HTMLAttrStatus,
2286     %HTMLM12NCommonAttrStatus,
2287 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2288 wakaba 1.49 }),
2289 wakaba 1.1 };
2290    
2291 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
2292     %HTMLPhrasingContentChecker,
2293     status => FEATURE_M12N10_REC,
2294     check_attrs => $GetHTMLAttrsChecker->({}, {
2295     %HTMLAttrStatus,
2296     %HTMLM12NCommonAttrStatus,
2297     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2298     }),
2299     };
2300    
2301 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
2302 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2303 wakaba 1.40 %HTMLPhrasingContentChecker,
2304 wakaba 1.1 };
2305    
2306     $Element->{$HTML_NS}->{dfn} = {
2307 wakaba 1.40 %HTMLPhrasingContentChecker,
2308 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2309     check_attrs => $GetHTMLAttrsChecker->({}, {
2310     %HTMLAttrStatus,
2311     %HTMLM12NCommonAttrStatus,
2312 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2313 wakaba 1.49 }),
2314 wakaba 1.40 check_start => sub {
2315     my ($self, $item, $element_state) = @_;
2316     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
2317 wakaba 1.1
2318 wakaba 1.40 my $node = $item->{node};
2319 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
2320     unless (defined $term) {
2321     for my $child (@{$node->child_nodes}) {
2322     if ($child->node_type == 1) { # ELEMENT_NODE
2323     if (defined $term) {
2324     undef $term;
2325     last;
2326     } elsif ($child->manakai_local_name eq 'abbr') {
2327     my $nsuri = $child->namespace_uri;
2328     if (defined $nsuri and $nsuri eq $HTML_NS) {
2329     my $attr = $child->get_attribute_node_ns (undef, 'title');
2330     if ($attr) {
2331     $term = $attr->value;
2332     }
2333     }
2334     }
2335     } elsif ($child->node_type == 3 or $child->node_type == 4) {
2336     ## TEXT_NODE or CDATA_SECTION_NODE
2337     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
2338     next;
2339     }
2340     undef $term;
2341     last;
2342     }
2343     }
2344     unless (defined $term) {
2345     $term = $node->text_content;
2346     }
2347     }
2348     if ($self->{term}->{$term}) {
2349     $self->{onerror}->(node => $node, type => 'duplicate term');
2350     push @{$self->{term}->{$term}}, $node;
2351     } else {
2352     $self->{term}->{$term} = [$node];
2353     }
2354     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2355     ## has |title|.
2356 wakaba 1.40 },
2357     check_end => sub {
2358     my ($self, $item, $element_state) = @_;
2359     $self->_remove_minus_elements ($element_state);
2360 wakaba 1.1
2361 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2362 wakaba 1.1 },
2363     };
2364    
2365     $Element->{$HTML_NS}->{abbr} = {
2366 wakaba 1.40 %HTMLPhrasingContentChecker,
2367 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2368     check_attrs => $GetHTMLAttrsChecker->({}, {
2369     %HTMLAttrStatus,
2370     %HTMLM12NCommonAttrStatus,
2371 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2372 wakaba 1.49 }),
2373     };
2374    
2375     $Element->{$HTML_NS}->{acronym} = {
2376     %HTMLPhrasingContentChecker,
2377     status => FEATURE_M12N10_REC,
2378     check_attrs => $GetHTMLAttrsChecker->({}, {
2379     %HTMLAttrStatus,
2380     %HTMLM12NCommonAttrStatus,
2381 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2382 wakaba 1.49 }),
2383 wakaba 1.1 };
2384    
2385     $Element->{$HTML_NS}->{time} = {
2386 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2387 wakaba 1.40 %HTMLPhrasingContentChecker,
2388     check_attrs => $GetHTMLAttrsChecker->({
2389 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2390 wakaba 1.49 }, {
2391     %HTMLAttrStatus,
2392     %HTMLM12NCommonAttrStatus,
2393 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2394 wakaba 1.1 }),
2395     ## TODO: Write tests
2396 wakaba 1.40 check_end => sub {
2397     my ($self, $item, $element_state) = @_;
2398 wakaba 1.1
2399 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2400 wakaba 1.1 my $input;
2401     my $reg_sp;
2402     my $input_node;
2403     if ($attr) {
2404     $input = $attr->value;
2405     $reg_sp = qr/[\x09-\x0D\x20]*/;
2406     $input_node = $attr;
2407     } else {
2408 wakaba 1.40 $input = $item->{node}->text_content;
2409 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2410 wakaba 1.40 $input_node = $item->{node};
2411 wakaba 1.1
2412     ## ISSUE: What is the definition for "successfully extracts a date
2413     ## or time"? If the algorithm says the string is invalid but
2414     ## return some date or time, is it "successfully"?
2415     }
2416    
2417     my $hour;
2418     my $minute;
2419     my $second;
2420     if ($input =~ /
2421     \A
2422     [\x09-\x0D\x20]*
2423     ([0-9]+) # 1
2424     (?>
2425     -([0-9]+) # 2
2426     -([0-9]+) # 3
2427     [\x09-\x0D\x20]*
2428     (?>
2429     T
2430     [\x09-\x0D\x20]*
2431     )?
2432     ([0-9]+) # 4
2433     :([0-9]+) # 5
2434     (?>
2435     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2436     )?
2437     [\x09-\x0D\x20]*
2438     (?>
2439     Z
2440     [\x09-\x0D\x20]*
2441     |
2442     [+-]([0-9]+):([0-9]+) # 7, 8
2443     [\x09-\x0D\x20]*
2444     )?
2445     \z
2446     |
2447     :([0-9]+) # 9
2448     (?>
2449     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2450     )?
2451     [\x09-\x0D\x20]*\z
2452     )
2453     /x) {
2454     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2455     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2456     length $4 != 2 or length $5 != 2) {
2457     $self->{onerror}->(node => $input_node,
2458     type => 'dateortime:syntax error');
2459     }
2460    
2461     if (1 <= $2 and $2 <= 12) {
2462     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2463     if $3 < 1 or
2464     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2465     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2466     if $2 == 2 and $3 == 29 and
2467     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2468     } else {
2469     $self->{onerror}->(node => $input_node,
2470     type => 'datetime:bad month');
2471     }
2472    
2473     ($hour, $minute, $second) = ($4, $5, $6);
2474    
2475     if (defined $7) { ## [+-]hh:mm
2476     if (length $7 != 2 or length $8 != 2) {
2477     $self->{onerror}->(node => $input_node,
2478     type => 'dateortime:syntax error');
2479     }
2480    
2481     $self->{onerror}->(node => $input_node,
2482     type => 'datetime:bad timezone hour')
2483     if $7 > 23;
2484     $self->{onerror}->(node => $input_node,
2485     type => 'datetime:bad timezone minute')
2486     if $8 > 59;
2487     }
2488     } else { ## hh:mm
2489     if (length $1 != 2 or length $9 != 2) {
2490     $self->{onerror}->(node => $input_node,
2491     type => qq'dateortime:syntax error');
2492     }
2493    
2494     ($hour, $minute, $second) = ($1, $9, $10);
2495     }
2496    
2497     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2498     if $hour > 23;
2499     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2500     if $minute > 59;
2501    
2502     if (defined $second) { ## s
2503     ## NOTE: Integer part of second don't have to have length of two.
2504    
2505     if (substr ($second, 0, 1) eq '.') {
2506     $self->{onerror}->(node => $input_node,
2507     type => 'dateortime:syntax error');
2508     }
2509    
2510     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2511     if $second >= 60;
2512     }
2513     } else {
2514     $self->{onerror}->(node => $input_node,
2515     type => 'dateortime:syntax error');
2516     }
2517    
2518 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2519 wakaba 1.1 },
2520     };
2521    
2522     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2523 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2524 wakaba 1.40 %HTMLPhrasingContentChecker,
2525     check_attrs => $GetHTMLAttrsChecker->({
2526 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2527     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2528     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2529     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2530     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2531     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2532 wakaba 1.50 }, {
2533     %HTMLAttrStatus,
2534     high => FEATURE_HTML5_DEFAULT,
2535     low => FEATURE_HTML5_DEFAULT,
2536     max => FEATURE_HTML5_DEFAULT,
2537     min => FEATURE_HTML5_DEFAULT,
2538     optimum => FEATURE_HTML5_DEFAULT,
2539     value => FEATURE_HTML5_DEFAULT,
2540 wakaba 1.1 }),
2541     };
2542    
2543     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2544 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2545 wakaba 1.40 %HTMLPhrasingContentChecker,
2546     check_attrs => $GetHTMLAttrsChecker->({
2547 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2548     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2549 wakaba 1.50 }, {
2550     %HTMLAttrStatus,
2551     max => FEATURE_HTML5_DEFAULT,
2552     value => FEATURE_HTML5_DEFAULT,
2553 wakaba 1.1 }),
2554     };
2555    
2556     $Element->{$HTML_NS}->{code} = {
2557 wakaba 1.40 %HTMLPhrasingContentChecker,
2558 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2559     check_attrs => $GetHTMLAttrsChecker->({}, {
2560     %HTMLAttrStatus,
2561     %HTMLM12NCommonAttrStatus,
2562 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2563 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2564 wakaba 1.49 }),
2565 wakaba 1.1 };
2566    
2567     $Element->{$HTML_NS}->{var} = {
2568 wakaba 1.40 %HTMLPhrasingContentChecker,
2569 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2570     check_attrs => $GetHTMLAttrsChecker->({}, {
2571     %HTMLAttrStatus,
2572     %HTMLM12NCommonAttrStatus,
2573 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2574 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2575 wakaba 1.49 }),
2576 wakaba 1.1 };
2577    
2578     $Element->{$HTML_NS}->{samp} = {
2579 wakaba 1.40 %HTMLPhrasingContentChecker,
2580 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2581     check_attrs => $GetHTMLAttrsChecker->({}, {
2582     %HTMLAttrStatus,
2583     %HTMLM12NCommonAttrStatus,
2584 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2585 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2586 wakaba 1.49 }),
2587 wakaba 1.1 };
2588    
2589     $Element->{$HTML_NS}->{kbd} = {
2590 wakaba 1.40 %HTMLPhrasingContentChecker,
2591 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2592     check_attrs => $GetHTMLAttrsChecker->({}, {
2593     %HTMLAttrStatus,
2594     %HTMLM12NCommonAttrStatus,
2595 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2596 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2597 wakaba 1.49 }),
2598 wakaba 1.1 };
2599    
2600     $Element->{$HTML_NS}->{sub} = {
2601 wakaba 1.40 %HTMLPhrasingContentChecker,
2602 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2603     check_attrs => $GetHTMLAttrsChecker->({}, {
2604     %HTMLAttrStatus,
2605     %HTMLM12NCommonAttrStatus,
2606 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2607 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2608 wakaba 1.49 }),
2609 wakaba 1.1 };
2610    
2611 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
2612 wakaba 1.1
2613     $Element->{$HTML_NS}->{span} = {
2614 wakaba 1.40 %HTMLPhrasingContentChecker,
2615 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2616     check_attrs => $GetHTMLAttrsChecker->({}, {
2617     %HTMLAttrStatus,
2618     %HTMLM12NCommonAttrStatus,
2619     datafld => FEATURE_HTML4_REC_RESERVED,
2620     dataformatas => FEATURE_HTML4_REC_RESERVED,
2621     datasrc => FEATURE_HTML4_REC_RESERVED,
2622 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2623 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
2624 wakaba 1.49 }),
2625 wakaba 1.1 };
2626    
2627     $Element->{$HTML_NS}->{i} = {
2628 wakaba 1.40 %HTMLPhrasingContentChecker,
2629 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2630     check_attrs => $GetHTMLAttrsChecker->({}, {
2631     %HTMLAttrStatus,
2632     %HTMLM12NCommonAttrStatus,
2633 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2634 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2635 wakaba 1.49 }),
2636 wakaba 1.1 };
2637    
2638 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
2639    
2640 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
2641     %HTMLPhrasingContentChecker,
2642     status => FEATURE_M12N10_REC,
2643     check_attrs => $GetHTMLAttrsChecker->({}, {
2644     %HTMLAttrStatus,
2645     %HTMLM12NCommonAttrStatus,
2646     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2647     sdaform => FEATURE_HTML20_RFC,
2648     }),
2649     };
2650 wakaba 1.51
2651     $Element->{$HTML_NS}->{s} = {
2652 wakaba 1.40 %HTMLPhrasingContentChecker,
2653 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
2654 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
2655     %HTMLAttrStatus,
2656     %HTMLM12NCommonAttrStatus,
2657 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2658 wakaba 1.49 }),
2659 wakaba 1.1 };
2660    
2661 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
2662    
2663     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
2664    
2665 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
2666 wakaba 1.40 %HTMLPhrasingContentChecker,
2667 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2668 wakaba 1.40 check_attrs => sub {
2669     my ($self, $item, $element_state) = @_;
2670 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2671     %HTMLAttrStatus,
2672 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2673     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2674     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2675 wakaba 1.49 style => FEATURE_XHTML10_REC,
2676 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2677     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2678 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
2679     sdasuff => FEATURE_HTML2X_RFC,
2680 wakaba 1.49 })->($self, $item, $element_state);
2681 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2682     $self->{onerror}->(node => $item->{node},
2683     type => 'attribute missing:dir');
2684 wakaba 1.1 }
2685     },
2686     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2687     };
2688    
2689 wakaba 1.29 =pod
2690    
2691     ## TODO:
2692    
2693     +
2694     + <p>Partly because of the confusion described above, authors are
2695     + strongly recommended to always mark up all paragraphs with the
2696     + <code>p</code> element, and to not have any <code>ins</code> or
2697     + <code>del</code> elements that cross across any <span
2698     + title="paragraph">implied paragraphs</span>.</p>
2699     +
2700     (An informative note)
2701    
2702     <p><code>ins</code> elements should not cross <span
2703     + title="paragraph">implied paragraph</span> boundaries.</p>
2704     (normative)
2705    
2706     + <p><code>del</code> elements should not cross <span
2707     + title="paragraph">implied paragraph</span> boundaries.</p>
2708     (normative)
2709    
2710     =cut
2711    
2712 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2713 wakaba 1.40 %HTMLTransparentChecker,
2714 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2715 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2716 wakaba 1.1 cite => $HTMLURIAttrChecker,
2717     datetime => $HTMLDatetimeAttrChecker,
2718 wakaba 1.49 }, {
2719     %HTMLAttrStatus,
2720     %HTMLM12NCommonAttrStatus,
2721 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2722     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2723     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2724 wakaba 1.1 }),
2725 wakaba 1.66 check_start => sub {
2726     my ($self, $item, $element_state) = @_;
2727    
2728     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2729     },
2730 wakaba 1.1 };
2731    
2732     $Element->{$HTML_NS}->{del} = {
2733 wakaba 1.40 %HTMLTransparentChecker,
2734 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2735 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2736 wakaba 1.1 cite => $HTMLURIAttrChecker,
2737     datetime => $HTMLDatetimeAttrChecker,
2738 wakaba 1.49 }, {
2739     %HTMLAttrStatus,
2740     %HTMLM12NCommonAttrStatus,
2741 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2742     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2743     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2744 wakaba 1.1 }),
2745 wakaba 1.40 check_end => sub {
2746     my ($self, $item, $element_state) = @_;
2747     if ($element_state->{has_significant}) {
2748     ## NOTE: Significantness flag does not propagate.
2749     } elsif ($item->{transparent}) {
2750     #
2751     } else {
2752     $self->{onerror}->(node => $item->{node},
2753     level => $self->{should_level},
2754     type => 'no significant content');
2755     }
2756 wakaba 1.1 },
2757 wakaba 1.66 check_start => sub {
2758     my ($self, $item, $element_state) = @_;
2759    
2760     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2761     },
2762 wakaba 1.1 };
2763    
2764 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2765 wakaba 1.40 %HTMLProseContentChecker,
2766 wakaba 1.48 status => FEATURE_HTML5_FD,
2767 wakaba 1.53 ## NOTE: legend, Prose | Prose, legend?
2768 wakaba 1.41 check_child_element => sub {
2769     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2770     $child_is_transparent, $element_state) = @_;
2771     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2772     $self->{onerror}->(node => $child_el,
2773     type => 'element not allowed:minus',
2774     level => $self->{must_level});
2775     $element_state->{has_non_legend} = 1;
2776     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2777     #
2778     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2779     if ($element_state->{has_legend_at_first}) {
2780     $self->{onerror}->(node => $child_el,
2781     type => 'element not allowed:figure legend',
2782     level => $self->{must_level});
2783     } elsif ($element_state->{has_legend}) {
2784     $self->{onerror}->(node => $element_state->{has_legend},
2785     type => 'element not allowed:figure legend',
2786     level => $self->{must_level});
2787     $element_state->{has_legend} = $child_el;
2788     } elsif ($element_state->{has_non_legend}) {
2789     $element_state->{has_legend} = $child_el;
2790     } else {
2791     $element_state->{has_legend_at_first} = 1;
2792 wakaba 1.35 }
2793 wakaba 1.41 delete $element_state->{has_non_legend};
2794     } else {
2795     $HTMLProseContentChecker{check_child_element}->(@_);
2796 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2797 wakaba 1.41 }
2798     },
2799     check_child_text => sub {
2800     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2801     if ($has_significant) {
2802     $element_state->{has_non_legend} = 1;
2803 wakaba 1.35 }
2804 wakaba 1.41 },
2805     check_end => sub {
2806     my ($self, $item, $element_state) = @_;
2807 wakaba 1.35
2808 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2809     #
2810     } elsif ($element_state->{has_legend}) {
2811     if ($element_state->{has_non_legend}) {
2812     $self->{onerror}->(node => $element_state->{has_legend},
2813 wakaba 1.35 type => 'element not allowed:figure legend',
2814     level => $self->{must_level});
2815     }
2816     }
2817 wakaba 1.41
2818     $HTMLProseContentChecker{check_end}->(@_);
2819     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2820 wakaba 1.35 },
2821     };
2822 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2823 wakaba 1.1
2824     $Element->{$HTML_NS}->{img} = {
2825 wakaba 1.40 %HTMLEmptyChecker,
2826 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2827 wakaba 1.40 check_attrs => sub {
2828     my ($self, $item, $element_state) = @_;
2829 wakaba 1.1 $GetHTMLAttrsChecker->({
2830     alt => sub { }, ## NOTE: No syntactical requirement
2831     src => $HTMLURIAttrChecker,
2832     usemap => $HTMLUsemapAttrChecker,
2833     ismap => sub {
2834 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2835     if (not $self->{flag}->{in_a_href}) {
2836 wakaba 1.15 $self->{onerror}->(node => $attr,
2837 wakaba 1.59 type => 'attribute not allowed:ismap',
2838     level => $self->{must_level});
2839 wakaba 1.1 }
2840 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2841 wakaba 1.1 },
2842     ## TODO: height
2843     ## TODO: width
2844 wakaba 1.49 }, {
2845     %HTMLAttrStatus,
2846     %HTMLM12NCommonAttrStatus,
2847     align => FEATURE_M12N10_REC_DEPRECATED,
2848 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2849 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2850 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2851 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2852 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2853     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2854 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2855     name => FEATURE_M12N10_REC_DEPRECATED,
2856 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2857 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2858     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2859 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2860 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2861 wakaba 1.66 })->($self, $item, $element_state);
2862 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2863     $self->{onerror}->(node => $item->{node},
2864 wakaba 1.37 type => 'attribute missing:alt',
2865     level => $self->{should_level});
2866 wakaba 1.1 }
2867 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2868     $self->{onerror}->(node => $item->{node},
2869     type => 'attribute missing:src');
2870 wakaba 1.1 }
2871 wakaba 1.66
2872     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2873     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
2874     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
2875     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
2876 wakaba 1.1 },
2877     };
2878    
2879     $Element->{$HTML_NS}->{iframe} = {
2880 wakaba 1.40 %HTMLTextChecker,
2881 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2882     ## NOTE: Not part of M12N10 Strict
2883 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2884 wakaba 1.1 src => $HTMLURIAttrChecker,
2885 wakaba 1.49 }, {
2886     %HTMLAttrStatus,
2887     %HTMLM12NCommonAttrStatus,
2888     align => FEATURE_XHTML10_REC,
2889 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2890 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2891     height => FEATURE_M12N10_REC,
2892 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2893 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2894     marginheight => FEATURE_M12N10_REC,
2895     marginwidth => FEATURE_M12N10_REC,
2896     name => FEATURE_M12N10_REC_DEPRECATED,
2897     scrolling => FEATURE_M12N10_REC,
2898 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2899     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2900 wakaba 1.49 width => FEATURE_M12N10_REC,
2901 wakaba 1.1 }),
2902 wakaba 1.66 check_start => sub {
2903     my ($self, $item, $element_state) = @_;
2904    
2905     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2906     },
2907 wakaba 1.40 };
2908    
2909 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2910 wakaba 1.40 %HTMLEmptyChecker,
2911 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2912 wakaba 1.40 check_attrs => sub {
2913     my ($self, $item, $element_state) = @_;
2914 wakaba 1.1 my $has_src;
2915 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2916 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2917     $attr_ns = '' unless defined $attr_ns;
2918     my $attr_ln = $attr->manakai_local_name;
2919     my $checker;
2920     if ($attr_ns eq '') {
2921     if ($attr_ln eq 'src') {
2922     $checker = $HTMLURIAttrChecker;
2923     $has_src = 1;
2924     } elsif ($attr_ln eq 'type') {
2925     $checker = $HTMLIMTAttrChecker;
2926     } else {
2927     ## TODO: height
2928     ## TODO: width
2929     $checker = $HTMLAttrChecker->{$attr_ln}
2930     || sub { }; ## NOTE: Any local attribute is ok.
2931     }
2932     }
2933     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2934     || $AttrChecker->{$attr_ns}->{''};
2935 wakaba 1.62
2936     my $status = {
2937     %HTMLAttrStatus,
2938     height => FEATURE_HTML5_DEFAULT,
2939     src => FEATURE_HTML5_DEFAULT,
2940     type => FEATURE_HTML5_DEFAULT,
2941     width => FEATURE_HTML5_DEFAULT,
2942     }->{$attr_ln};
2943    
2944 wakaba 1.1 if ($checker) {
2945 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
2946 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2947 wakaba 1.54 #
2948 wakaba 1.1 } else {
2949     $self->{onerror}->(node => $attr, level => 'unsupported',
2950     type => 'attribute');
2951 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2952     }
2953    
2954     if ($attr_ns eq '') {
2955     $self->_attr_status_info ($attr, $status) if $status;
2956 wakaba 1.1 }
2957     }
2958    
2959     unless ($has_src) {
2960 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2961 wakaba 1.1 type => 'attribute missing:src');
2962     }
2963 wakaba 1.66
2964     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
2965 wakaba 1.1 },
2966     };
2967    
2968 wakaba 1.49 ## TODO:
2969     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2970     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2971    
2972 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2973 wakaba 1.40 %HTMLTransparentChecker,
2974 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2975 wakaba 1.40 check_attrs => sub {
2976     my ($self, $item, $element_state) = @_;
2977 wakaba 1.1 $GetHTMLAttrsChecker->({
2978     data => $HTMLURIAttrChecker,
2979     type => $HTMLIMTAttrChecker,
2980     usemap => $HTMLUsemapAttrChecker,
2981     ## TODO: width
2982     ## TODO: height
2983 wakaba 1.49 }, {
2984     %HTMLAttrStatus,
2985     %HTMLM12NCommonAttrStatus,
2986     align => FEATURE_XHTML10_REC,
2987     archive => FEATURE_M12N10_REC,
2988     border => FEATURE_XHTML10_REC,
2989     classid => FEATURE_M12N10_REC,
2990     codebase => FEATURE_M12N10_REC,
2991     codetype => FEATURE_M12N10_REC,
2992 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2993 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2994     dataformatas => FEATURE_HTML4_REC_RESERVED,
2995     datasrc => FEATURE_HTML4_REC_RESERVED,
2996     declare => FEATURE_M12N10_REC,
2997 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2998 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2999 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3000 wakaba 1.49 name => FEATURE_M12N10_REC,
3001     standby => FEATURE_M12N10_REC,
3002 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3003     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3004     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3005 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
3006 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3007 wakaba 1.66 })->($self, $item, $element_state);
3008 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
3009     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
3010     $self->{onerror}->(node => $item->{node},
3011 wakaba 1.1 type => 'attribute missing:data|type');
3012     }
3013     }
3014 wakaba 1.66
3015     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
3016     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
3017     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
3018     ## TODO: archive
3019     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
3020 wakaba 1.1 },
3021 wakaba 1.41 ## NOTE: param*, transparent (Prose)
3022     check_child_element => sub {
3023     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3024     $child_is_transparent, $element_state) = @_;
3025     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3026     $self->{onerror}->(node => $child_el,
3027     type => 'element not allowed:minus',
3028     level => $self->{must_level});
3029     $element_state->{has_non_legend} = 1;
3030     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3031     #
3032     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
3033     if ($element_state->{has_non_param}) {
3034     $self->{onerror}->(node => $child_el,
3035     type => 'element not allowed:prose',
3036     level => $self->{must_level});
3037 wakaba 1.39 }
3038 wakaba 1.41 } else {
3039     $HTMLProseContentChecker{check_child_element}->(@_);
3040     $element_state->{has_non_param} = 1;
3041 wakaba 1.39 }
3042 wakaba 1.25 },
3043 wakaba 1.41 check_child_text => sub {
3044     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3045     if ($has_significant) {
3046     $element_state->{has_non_param} = 1;
3047     }
3048 wakaba 1.42 },
3049     check_end => sub {
3050     my ($self, $item, $element_state) = @_;
3051     if ($element_state->{has_significant}) {
3052 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
3053 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
3054     ## NOTE: Transparent.
3055     } else {
3056     $self->{onerror}->(node => $item->{node},
3057     level => $self->{should_level},
3058     type => 'no significant content');
3059     }
3060     },
3061 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
3062 wakaba 1.1 };
3063 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
3064     ## What about |<section><object data><style scoped></style>x</object></section>|?
3065     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
3066 wakaba 1.1
3067     $Element->{$HTML_NS}->{param} = {
3068 wakaba 1.40 %HTMLEmptyChecker,
3069 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3070 wakaba 1.40 check_attrs => sub {
3071     my ($self, $item, $element_state) = @_;
3072 wakaba 1.1 $GetHTMLAttrsChecker->({
3073     name => sub { },
3074     value => sub { },
3075 wakaba 1.49 }, {
3076     %HTMLAttrStatus,
3077 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3078     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3079 wakaba 1.49 type => FEATURE_M12N10_REC,
3080 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3081 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
3082 wakaba 1.66 })->(@_);
3083 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
3084     $self->{onerror}->(node => $item->{node},
3085 wakaba 1.1 type => 'attribute missing:name');
3086     }
3087 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
3088     $self->{onerror}->(node => $item->{node},
3089 wakaba 1.1 type => 'attribute missing:value');
3090     }
3091 wakaba 1.66
3092     $element_state->{uri_info}->{value}->{type}->{resource} = 1;
3093 wakaba 1.1 },
3094     };
3095    
3096     $Element->{$HTML_NS}->{video} = {
3097 wakaba 1.40 %HTMLTransparentChecker,
3098 wakaba 1.48 status => FEATURE_HTML5_LC,
3099 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3100 wakaba 1.1 src => $HTMLURIAttrChecker,
3101     ## TODO: start, loopstart, loopend, end
3102     ## ISSUE: they MUST be "value time offset"s. Value?
3103 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
3104 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
3105     controls => $GetHTMLBooleanAttrChecker->('controls'),
3106 wakaba 1.59 poster => $HTMLURIAttrChecker,
3107 wakaba 1.42 ## TODO: width, height
3108 wakaba 1.50 }, {
3109     %HTMLAttrStatus,
3110     autoplay => FEATURE_HTML5_LC,
3111     controls => FEATURE_HTML5_LC,
3112     end => FEATURE_HTML5_LC,
3113     height => FEATURE_HTML5_LC,
3114     loopend => FEATURE_HTML5_LC,
3115     loopstart => FEATURE_HTML5_LC,
3116     playcount => FEATURE_HTML5_LC,
3117     poster => FEATURE_HTML5_LC,
3118     src => FEATURE_HTML5_LC,
3119     start => FEATURE_HTML5_LC,
3120     width => FEATURE_HTML5_LC,
3121 wakaba 1.1 }),
3122 wakaba 1.42 check_start => sub {
3123     my ($self, $item, $element_state) = @_;
3124     $element_state->{allow_source}
3125     = not $item->{node}->has_attribute_ns (undef, 'src');
3126     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
3127     ## NOTE: It might be set true by |check_element|.
3128 wakaba 1.66
3129     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
3130     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
3131 wakaba 1.42 },
3132     check_child_element => sub {
3133     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3134     $child_is_transparent, $element_state) = @_;
3135     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3136     $self->{onerror}->(node => $child_el,
3137     type => 'element not allowed:minus',
3138     level => $self->{must_level});
3139     delete $element_state->{allow_source};
3140     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3141     #
3142     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
3143 wakaba 1.45 unless ($element_state->{allow_source}) {
3144 wakaba 1.42 $self->{onerror}->(node => $child_el,
3145     type => 'element not allowed:prose',
3146     level => $self->{must_level});
3147     }
3148 wakaba 1.45 $element_state->{has_source} = 1;
3149 wakaba 1.1 } else {
3150 wakaba 1.42 delete $element_state->{allow_source};
3151     $HTMLProseContentChecker{check_child_element}->(@_);
3152     }
3153     },
3154     check_child_text => sub {
3155     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3156     if ($has_significant) {
3157     delete $element_state->{allow_source};
3158     }
3159     $HTMLProseContentChecker{check_child_text}->(@_);
3160     },
3161     check_end => sub {
3162     my ($self, $item, $element_state) = @_;
3163     if ($element_state->{has_source} == -1) {
3164     $self->{onerror}->(node => $item->{node},
3165     type => 'element missing:source',
3166     level => $self->{must_level});
3167 wakaba 1.1 }
3168 wakaba 1.42
3169     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
3170 wakaba 1.1 },
3171     };
3172    
3173     $Element->{$HTML_NS}->{audio} = {
3174 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
3175 wakaba 1.48 status => FEATURE_HTML5_LC,
3176 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
3177     src => $HTMLURIAttrChecker,
3178     ## TODO: start, loopstart, loopend, end
3179     ## ISSUE: they MUST be "value time offset"s. Value?
3180     ## ISSUE: playcount has no conformance creteria
3181     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
3182     controls => $GetHTMLBooleanAttrChecker->('controls'),
3183 wakaba 1.50 }, {
3184     %HTMLAttrStatus,
3185     autoplay => FEATURE_HTML5_LC,
3186     controls => FEATURE_HTML5_LC,
3187     end => FEATURE_HTML5_LC,
3188     loopend => FEATURE_HTML5_LC,
3189     loopstart => FEATURE_HTML5_LC,
3190     playcount => FEATURE_HTML5_LC,
3191     src => FEATURE_HTML5_LC,
3192     start => FEATURE_HTML5_LC,
3193 wakaba 1.42 }),
3194 wakaba 1.1 };
3195    
3196     $Element->{$HTML_NS}->{source} = {
3197 wakaba 1.40 %HTMLEmptyChecker,
3198 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3199 wakaba 1.40 check_attrs => sub {
3200     my ($self, $item, $element_state) = @_;
3201 wakaba 1.1 $GetHTMLAttrsChecker->({
3202     src => $HTMLURIAttrChecker,
3203     type => $HTMLIMTAttrChecker,
3204     media => $HTMLMQAttrChecker,
3205 wakaba 1.50 }, {
3206     %HTMLAttrStatus,
3207     media => FEATURE_HTML5_DEFAULT,
3208     src => FEATURE_HTML5_DEFAULT,
3209     type => FEATURE_HTML5_DEFAULT,
3210 wakaba 1.66 })->(@_);
3211 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
3212     $self->{onerror}->(node => $item->{node},
3213 wakaba 1.1 type => 'attribute missing:src');
3214     }
3215 wakaba 1.66
3216     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
3217 wakaba 1.1 },
3218     };
3219    
3220     $Element->{$HTML_NS}->{canvas} = {
3221 wakaba 1.40 %HTMLTransparentChecker,
3222 wakaba 1.48 status => FEATURE_HTML5_LC,
3223 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3224 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3225     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3226 wakaba 1.50 }, {
3227     %HTMLAttrStatus,
3228     height => FEATURE_HTML5_LC,
3229     width => FEATURE_HTML5_LC,
3230 wakaba 1.1 }),
3231     };
3232    
3233     $Element->{$HTML_NS}->{map} = {
3234 wakaba 1.40 %HTMLProseContentChecker,
3235 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3236 wakaba 1.40 check_attrs => sub {
3237     my ($self, $item, $element_state) = @_;
3238 wakaba 1.4 my $has_id;
3239     $GetHTMLAttrsChecker->({
3240     id => sub {
3241     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
3242     my ($self, $attr) = @_;
3243     my $value = $attr->value;
3244     if (length $value > 0) {
3245     if ($self->{id}->{$value}) {
3246     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3247     push @{$self->{id}->{$value}}, $attr;
3248     } else {
3249     $self->{id}->{$value} = [$attr];
3250     }
3251 wakaba 1.1 } else {
3252 wakaba 1.4 ## NOTE: MUST contain at least one character
3253     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3254 wakaba 1.1 }
3255 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
3256     $self->{onerror}->(node => $attr, type => 'space in ID');
3257     }
3258     $self->{map}->{$value} ||= $attr;
3259     $has_id = 1;
3260     },
3261 wakaba 1.49 }, {
3262     %HTMLAttrStatus,
3263 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3264     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3265     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3266     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3267 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
3268 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3269     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3270     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3271     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3272     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3273     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3274     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3275     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3276     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3277     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3278     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3279 wakaba 1.66 })->(@_);
3280 wakaba 1.40 $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
3281 wakaba 1.4 unless $has_id;
3282     },
3283 wakaba 1.59 check_start => sub {
3284     my ($self, $item, $element_state) = @_;
3285     $element_state->{in_map_original} = $self->{flag}->{in_map};
3286     $self->{flag}->{in_map} = 1;
3287     },
3288     check_end => sub {
3289     my ($self, $item, $element_state) = @_;
3290     delete $self->{flag}->{in_map} unless $element_state->{in_map_original};
3291     $HTMLProseContentChecker{check_end}->(@_);
3292     },
3293 wakaba 1.1 };
3294    
3295     $Element->{$HTML_NS}->{area} = {
3296 wakaba 1.40 %HTMLEmptyChecker,
3297 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3298 wakaba 1.40 check_attrs => sub {
3299     my ($self, $item, $element_state) = @_;
3300 wakaba 1.1 my %attr;
3301     my $coords;
3302 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3303 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3304     $attr_ns = '' unless defined $attr_ns;
3305     my $attr_ln = $attr->manakai_local_name;
3306     my $checker;
3307     if ($attr_ns eq '') {
3308     $checker = {
3309 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3310 wakaba 1.1 alt => sub { },
3311     ## NOTE: |alt| value has no conformance creteria.
3312     shape => $GetHTMLEnumeratedAttrChecker->({
3313     circ => -1, circle => 1,
3314     default => 1,
3315     poly => 1, polygon => -1,
3316     rect => 1, rectangle => -1,
3317     }),
3318     coords => sub {
3319     my ($self, $attr) = @_;
3320     my $value = $attr->value;
3321     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
3322     $coords = [split /,/, $value];
3323     } else {
3324     $self->{onerror}->(node => $attr,
3325     type => 'coords:syntax error');
3326     }
3327     },
3328     target => $HTMLTargetAttrChecker,
3329     href => $HTMLURIAttrChecker,
3330     ping => $HTMLSpaceURIsAttrChecker,
3331 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3332 wakaba 1.1 media => $HTMLMQAttrChecker,
3333     hreflang => $HTMLLanguageTagAttrChecker,
3334     type => $HTMLIMTAttrChecker,
3335     }->{$attr_ln};
3336     if ($checker) {
3337     $attr{$attr_ln} = $attr;
3338     } else {
3339     $checker = $HTMLAttrChecker->{$attr_ln};
3340     }
3341     }
3342     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3343     || $AttrChecker->{$attr_ns}->{''};
3344 wakaba 1.62
3345     my $status = {
3346     %HTMLAttrStatus,
3347     %HTMLM12NCommonAttrStatus,
3348     accesskey => FEATURE_M12N10_REC,
3349     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3350     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3351     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3352     hreflang => FEATURE_HTML5_DEFAULT,
3353     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3354     media => FEATURE_HTML5_DEFAULT,
3355     nohref => FEATURE_M12N10_REC,
3356     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3357     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3358     ping => FEATURE_HTML5_DEFAULT,
3359     rel => FEATURE_HTML5_DEFAULT,
3360     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3361     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3362     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3363     type => FEATURE_HTML5_DEFAULT,
3364     }->{$attr_ln};
3365    
3366 wakaba 1.1 if ($checker) {
3367 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3368 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3369 wakaba 1.54 #
3370 wakaba 1.1 } else {
3371     $self->{onerror}->(node => $attr, level => 'unsupported',
3372     type => 'attribute');
3373     ## ISSUE: No comformance createria for unknown attributes in the spec
3374     }
3375 wakaba 1.49
3376     if ($attr_ns eq '') {
3377 wakaba 1.62 $self->_attr_status_info ($attr, $status);
3378 wakaba 1.49 }
3379 wakaba 1.1 }
3380    
3381     if (defined $attr{href}) {
3382 wakaba 1.4 $self->{has_hyperlink_element} = 1;
3383 wakaba 1.1 unless (defined $attr{alt}) {
3384 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3385 wakaba 1.1 type => 'attribute missing:alt');
3386     }
3387     } else {
3388     for (qw/target ping rel media hreflang type alt/) {
3389     if (defined $attr{$_}) {
3390     $self->{onerror}->(node => $attr{$_},
3391     type => 'attribute not allowed');
3392     }
3393     }
3394     }
3395    
3396     my $shape = 'rectangle';
3397     if (defined $attr{shape}) {
3398     $shape = {
3399     circ => 'circle', circle => 'circle',
3400     default => 'default',
3401     poly => 'polygon', polygon => 'polygon',
3402     rect => 'rectangle', rectangle => 'rectangle',
3403     }->{lc $attr{shape}->value} || 'rectangle';
3404     ## TODO: ASCII lowercase?
3405     }
3406    
3407     if ($shape eq 'circle') {
3408     if (defined $attr{coords}) {
3409     if (defined $coords) {
3410     if (@$coords == 3) {
3411     if ($coords->[2] < 0) {
3412     $self->{onerror}->(node => $attr{coords},
3413     type => 'coords:out of range:2');
3414     }
3415     } else {
3416     $self->{onerror}->(node => $attr{coords},
3417     type => 'coords:number:3:'.@$coords);
3418     }
3419     } else {
3420     ## NOTE: A syntax error has been reported.
3421     }
3422     } else {
3423 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3424 wakaba 1.1 type => 'attribute missing:coords');
3425     }
3426     } elsif ($shape eq 'default') {
3427     if (defined $attr{coords}) {
3428     $self->{onerror}->(node => $attr{coords},
3429     type => 'attribute not allowed');
3430     }
3431     } elsif ($shape eq 'polygon') {
3432     if (defined $attr{coords}) {
3433     if (defined $coords) {
3434     if (@$coords >= 6) {
3435     unless (@$coords % 2 == 0) {
3436     $self->{onerror}->(node => $attr{coords},
3437     type => 'coords:number:even:'.@$coords);
3438     }
3439     } else {
3440     $self->{onerror}->(node => $attr{coords},
3441     type => 'coords:number:>=6:'.@$coords);
3442     }
3443     } else {
3444     ## NOTE: A syntax error has been reported.
3445     }
3446     } else {
3447 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3448 wakaba 1.1 type => 'attribute missing:coords');
3449     }
3450     } elsif ($shape eq 'rectangle') {
3451     if (defined $attr{coords}) {
3452     if (defined $coords) {
3453     if (@$coords == 4) {
3454     unless ($coords->[0] < $coords->[2]) {
3455     $self->{onerror}->(node => $attr{coords},
3456     type => 'coords:out of range:0');
3457     }
3458     unless ($coords->[1] < $coords->[3]) {
3459     $self->{onerror}->(node => $attr{coords},
3460     type => 'coords:out of range:1');
3461     }
3462     } else {
3463     $self->{onerror}->(node => $attr{coords},
3464     type => 'coords:number:4:'.@$coords);
3465     }
3466     } else {
3467     ## NOTE: A syntax error has been reported.
3468     }
3469     } else {
3470 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3471 wakaba 1.1 type => 'attribute missing:coords');
3472     }
3473     }
3474 wakaba 1.66
3475     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3476 wakaba 1.1 },
3477 wakaba 1.59 check_start => sub {
3478     my ($self, $item, $element_state) = @_;
3479     unless ($self->{flag}->{in_map} or
3480     not $item->{node}->manakai_parent_element) {
3481     $self->{onerror}->(node => $item->{node},
3482     type => 'element not allowed:area',
3483     level => $self->{must_level});
3484     }
3485     },
3486 wakaba 1.1 };
3487    
3488     $Element->{$HTML_NS}->{table} = {
3489 wakaba 1.40 %HTMLChecker,
3490 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3491 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
3492     frame => $GetHTMLEnumeratedAttrChecker->({
3493     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
3494     lhs => 1, rhs => 1, box => 1, border => 1,
3495     }),
3496     rules => $GetHTMLEnumeratedAttrChecker->({
3497     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
3498     }),
3499     summary => sub {}, ## NOTE: %Text; in HTML4.
3500     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
3501     }, {
3502 wakaba 1.49 %HTMLAttrStatus,
3503     %HTMLM12NCommonAttrStatus,
3504     align => FEATURE_M12N10_REC_DEPRECATED,
3505     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3506     border => FEATURE_M12N10_REC,
3507     cellpadding => FEATURE_M12N10_REC,
3508     cellspacing => FEATURE_M12N10_REC,
3509 wakaba 1.61 cols => FEATURE_RFC1942,
3510 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3511     dataformatas => FEATURE_HTML4_REC_RESERVED,
3512     datapagesize => FEATURE_M12N10_REC,
3513     datasrc => FEATURE_HTML4_REC_RESERVED,
3514     frame => FEATURE_M12N10_REC,
3515 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3516 wakaba 1.49 rules => FEATURE_M12N10_REC,
3517     summary => FEATURE_M12N10_REC,
3518     width => FEATURE_M12N10_REC,
3519     }),
3520 wakaba 1.40 check_start => sub {
3521     my ($self, $item, $element_state) = @_;
3522     $element_state->{phase} = 'before caption';
3523 wakaba 1.66
3524     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
3525 wakaba 1.40 },
3526     check_child_element => sub {
3527     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3528     $child_is_transparent, $element_state) = @_;
3529     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3530     $self->{onerror}->(node => $child_el,
3531     type => 'element not allowed:minus',
3532     level => $self->{must_level});
3533     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3534     #
3535     } elsif ($element_state->{phase} eq 'in tbodys') {
3536     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3537     #$element_state->{phase} = 'in tbodys';
3538     } elsif (not $element_state->{has_tfoot} and
3539     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3540     $element_state->{phase} = 'after tfoot';
3541     $element_state->{has_tfoot} = 1;
3542     } else {
3543     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3544     }
3545     } elsif ($element_state->{phase} eq 'in trs') {
3546     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3547     #$element_state->{phase} = 'in trs';
3548     } elsif (not $element_state->{has_tfoot} and
3549     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3550     $element_state->{phase} = 'after tfoot';
3551     $element_state->{has_tfoot} = 1;
3552     } else {
3553     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3554     }
3555     } elsif ($element_state->{phase} eq 'after thead') {
3556     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3557     $element_state->{phase} = 'in tbodys';
3558     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3559     $element_state->{phase} = 'in trs';
3560     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3561     $element_state->{phase} = 'in tbodys';
3562     $element_state->{has_tfoot} = 1;
3563     } else {
3564     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3565     }
3566     } elsif ($element_state->{phase} eq 'in colgroup') {
3567     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3568     $element_state->{phase} = 'in colgroup';
3569     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3570     $element_state->{phase} = 'after thead';
3571     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3572     $element_state->{phase} = 'in tbodys';
3573     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3574     $element_state->{phase} = 'in trs';
3575     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3576     $element_state->{phase} = 'in tbodys';
3577     $element_state->{has_tfoot} = 1;
3578     } else {
3579     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3580     }
3581     } elsif ($element_state->{phase} eq 'before caption') {
3582     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3583     $element_state->{phase} = 'in colgroup';
3584     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3585     $element_state->{phase} = 'in colgroup';
3586     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3587     $element_state->{phase} = 'after thead';
3588     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3589     $element_state->{phase} = 'in tbodys';
3590     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3591     $element_state->{phase} = 'in trs';
3592     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3593     $element_state->{phase} = 'in tbodys';
3594     $element_state->{has_tfoot} = 1;
3595     } else {
3596     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3597     }
3598     } elsif ($element_state->{phase} eq 'after tfoot') {
3599     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3600     } else {
3601     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3602     }
3603     },
3604     check_child_text => sub {
3605     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3606     if ($has_significant) {
3607     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3608 wakaba 1.1 }
3609 wakaba 1.40 },
3610     check_end => sub {
3611     my ($self, $item, $element_state) = @_;
3612 wakaba 1.1
3613     ## Table model errors
3614     require Whatpm::HTMLTable;
3615 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3616 wakaba 1.1 my %opt = @_;
3617     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3618     });
3619 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3620 wakaba 1.1
3621 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3622 wakaba 1.1 },
3623     };
3624    
3625     $Element->{$HTML_NS}->{caption} = {
3626 wakaba 1.40 %HTMLPhrasingContentChecker,
3627 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3628 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
3629     align => $GetHTMLEnumeratedAttrChecker->({
3630     top => 1, bottom => 1, left => 1, right => 1,
3631     }),
3632     }, {
3633 wakaba 1.49 %HTMLAttrStatus,
3634     %HTMLM12NCommonAttrStatus,
3635     align => FEATURE_M12N10_REC_DEPRECATED,
3636 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3637 wakaba 1.49 }),
3638 wakaba 1.1 };
3639    
3640 wakaba 1.69 my %cellalign = (
3641     ## HTML4 %cellhalign;
3642    
3643     ## HTML4 %cellvalign;
3644     );
3645    
3646 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
3647 wakaba 1.40 %HTMLEmptyChecker,
3648 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3649 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3650 wakaba 1.69 %cellalign,
3651 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3652     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3653     ## TODO: "attribute not supported" if |col|.
3654     ## ISSUE: MUST NOT if any |col|?
3655     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3656 wakaba 1.49 }, {
3657     %HTMLAttrStatus,
3658     %HTMLM12NCommonAttrStatus,
3659     align => FEATURE_M12N10_REC,
3660     char => FEATURE_M12N10_REC,
3661     charoff => FEATURE_M12N10_REC,
3662 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3663     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3664 wakaba 1.49 valign => FEATURE_M12N10_REC,
3665     width => FEATURE_M12N10_REC,
3666 wakaba 1.1 }),
3667 wakaba 1.40 check_child_element => sub {
3668     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3669     $child_is_transparent, $element_state) = @_;
3670     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3671     $self->{onerror}->(node => $child_el,
3672     type => 'element not allowed:minus',
3673     level => $self->{must_level});
3674     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3675     #
3676     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3677     #
3678     } else {
3679     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3680     }
3681     },
3682     check_child_text => sub {
3683     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3684     if ($has_significant) {
3685     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3686 wakaba 1.1 }
3687     },
3688     };
3689    
3690     $Element->{$HTML_NS}->{col} = {
3691 wakaba 1.40 %HTMLEmptyChecker,
3692 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3693 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3694 wakaba 1.69 %cellalign,
3695 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3696 wakaba 1.49 }, {
3697     %HTMLAttrStatus,
3698     %HTMLM12NCommonAttrStatus,
3699     align => FEATURE_M12N10_REC,
3700     char => FEATURE_M12N10_REC,
3701     charoff => FEATURE_M12N10_REC,
3702 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3703     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3704 wakaba 1.49 valign => FEATURE_M12N10_REC,
3705     width => FEATURE_M12N10_REC,
3706 wakaba 1.1 }),
3707     };
3708    
3709     $Element->{$HTML_NS}->{tbody} = {
3710 wakaba 1.40 %HTMLChecker,
3711 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3712 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
3713     %cellalign,
3714     }, {
3715 wakaba 1.49 %HTMLAttrStatus,
3716     %HTMLM12NCommonAttrStatus,
3717     align => FEATURE_M12N10_REC,
3718     char => FEATURE_M12N10_REC,
3719     charoff => FEATURE_M12N10_REC,
3720 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3721 wakaba 1.49 valign => FEATURE_M12N10_REC,
3722     }),
3723 wakaba 1.40 check_child_element => sub {
3724     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3725     $child_is_transparent, $element_state) = @_;
3726     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3727     $self->{onerror}->(node => $child_el,
3728     type => 'element not allowed:minus',
3729     level => $self->{must_level});
3730     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3731     #
3732     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3733     $element_state->{has_tr} = 1;
3734     } else {
3735     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3736     }
3737     },
3738     check_child_text => sub {
3739     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3740     if ($has_significant) {
3741     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3742 wakaba 1.1 }
3743 wakaba 1.40 },
3744     check_end => sub {
3745     my ($self, $item, $element_state) = @_;
3746     unless ($element_state->{has_tr}) {
3747     $self->{onerror}->(node => $item->{node},
3748     type => 'child element missing:tr');
3749 wakaba 1.1 }
3750 wakaba 1.40
3751     $HTMLChecker{check_end}->(@_);
3752 wakaba 1.1 },
3753     };
3754    
3755     $Element->{$HTML_NS}->{thead} = {
3756 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3757 wakaba 1.1 };
3758    
3759     $Element->{$HTML_NS}->{tfoot} = {
3760 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3761 wakaba 1.1 };
3762    
3763     $Element->{$HTML_NS}->{tr} = {
3764 wakaba 1.40 %HTMLChecker,
3765 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3766 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
3767     %cellalign,
3768     bgcolor => $HTMLColorAttrChecker,
3769     }, {
3770 wakaba 1.49 %HTMLAttrStatus,
3771     %HTMLM12NCommonAttrStatus,
3772     align => FEATURE_M12N10_REC,
3773     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3774     char => FEATURE_M12N10_REC,
3775     charoff => FEATURE_M12N10_REC,
3776 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3777 wakaba 1.49 valign => FEATURE_M12N10_REC,
3778     }),
3779 wakaba 1.40 check_child_element => sub {
3780     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3781     $child_is_transparent, $element_state) = @_;
3782     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3783     $self->{onerror}->(node => $child_el,
3784     type => 'element not allowed:minus',
3785     level => $self->{must_level});
3786     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3787     #
3788     } elsif ($child_nsuri eq $HTML_NS and
3789     ($child_ln eq 'td' or $child_ln eq 'th')) {
3790     $element_state->{has_cell} = 1;
3791     } else {
3792     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3793     }
3794     },
3795     check_child_text => sub {
3796     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3797     if ($has_significant) {
3798     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3799 wakaba 1.1 }
3800 wakaba 1.40 },
3801     check_end => sub {
3802     my ($self, $item, $element_state) = @_;
3803     unless ($element_state->{has_cell}) {
3804     $self->{onerror}->(node => $item->{node},
3805     type => 'child element missing:td|th');
3806 wakaba 1.1 }
3807 wakaba 1.40
3808     $HTMLChecker{check_end}->(@_);
3809 wakaba 1.1 },
3810     };
3811    
3812     $Element->{$HTML_NS}->{td} = {
3813 wakaba 1.40 %HTMLProseContentChecker,
3814 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3815 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3816 wakaba 1.69 %cellalign,
3817     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
3818     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
3819     bgcolor => $HTMLColorAttrChecker,
3820 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3821 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
3822 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3823 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
3824     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3825 wakaba 1.49 }, {
3826     %HTMLAttrStatus,
3827     %HTMLM12NCommonAttrStatus,
3828     abbr => FEATURE_M12N10_REC,
3829     align => FEATURE_M12N10_REC,
3830     axis => FEATURE_M12N10_REC,
3831     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3832     char => FEATURE_M12N10_REC,
3833     charoff => FEATURE_M12N10_REC,
3834 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3835 wakaba 1.49 headers => FEATURE_M12N10_REC,
3836     height => FEATURE_M12N10_REC_DEPRECATED,
3837 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3838 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3839 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3840 wakaba 1.49 scope => FEATURE_M12N10_REC,
3841     valign => FEATURE_M12N10_REC,
3842     width => FEATURE_M12N10_REC_DEPRECATED,
3843 wakaba 1.1 }),
3844     };
3845    
3846     $Element->{$HTML_NS}->{th} = {
3847 wakaba 1.40 %HTMLPhrasingContentChecker,
3848 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3849 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3850 wakaba 1.69 %cellalign,
3851     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
3852     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
3853     bgcolor => $HTMLColorAttrChecker,
3854 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3855 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
3856 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3857     scope => $GetHTMLEnumeratedAttrChecker
3858     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3859 wakaba 1.49 }, {
3860     %HTMLAttrStatus,
3861     %HTMLM12NCommonAttrStatus,
3862     abbr => FEATURE_M12N10_REC,
3863     align => FEATURE_M12N10_REC,
3864     axis => FEATURE_M12N10_REC,
3865     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3866     char => FEATURE_M12N10_REC,
3867     charoff => FEATURE_M12N10_REC,
3868 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3869 wakaba 1.49 headers => FEATURE_M12N10_REC,
3870     height => FEATURE_M12N10_REC_DEPRECATED,
3871 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3872 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3873 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3874     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3875 wakaba 1.49 valign => FEATURE_M12N10_REC,
3876     width => FEATURE_M12N10_REC_DEPRECATED,
3877 wakaba 1.1 }),
3878     };
3879    
3880 wakaba 1.52 my $AttrCheckerNotImplemented = sub {
3881     my ($self, $attr) = @_;
3882     $self->{onerror}->(node => $attr, level => 'unsupported',
3883     type => 'attribute');
3884     };
3885    
3886     $Element->{$HTML_NS}->{form} = {
3887 wakaba 1.56 %HTMLProseContentChecker, ## NOTE: Flow* [WF2]
3888     ## TODO: form in form is allowed in XML [WF2]
3889 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3890     check_attrs => $GetHTMLAttrsChecker->({
3891 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3892 wakaba 1.52 'accept-charset' => $AttrCheckerNotImplemented, ## TODO: Charsets
3893     action => $HTMLURIAttrChecker, ## TODO: "User agent behavior for a value other than HTTP URI is undefined" [HTML4]
3894 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
3895     enctype => $HTMLIMTAttrChecker, ## TODO: "multipart/form-data" should be used when type=file is used [HTML4] ## TODO: MUST NOT parameter [WF2]
3896     method => $GetHTMLEnumeratedAttrChecker->({
3897     get => 1, post => 1, put => 1, delete => 1,
3898     }),
3899 wakaba 1.52 ## NOTE: "get" SHOULD be used for idempotent submittion,
3900     ## "post" SHOULD be used otherwise [HTML4]. This cannot be tested.
3901     name => sub { }, # CDATA in HTML4 ## TODO: must be same as |id| (informative!) [XHTML10]
3902 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
3903     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3904 wakaba 1.52 target => $HTMLTargetAttrChecker,
3905     ## TODO: Warn for combination whose behavior is not defined.
3906     }, {
3907     %HTMLAttrStatus,
3908     %HTMLM12NCommonAttrStatus,
3909 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
3910 wakaba 1.52 'accept-charset' => FEATURE_M12N10_REC,
3911 wakaba 1.56 action => FEATURE_WF2 | FEATURE_M12N10_REC,
3912     data => FEATURE_WF2,
3913     enctype => FEATURE_WF2 | FEATURE_M12N10_REC,
3914 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3915 wakaba 1.56 method => FEATURE_WF2 | FEATURE_M12N10_REC,
3916 wakaba 1.52 name => FEATURE_M12N10_REC_DEPRECATED,
3917 wakaba 1.56 onreceived => FEATURE_WF2,
3918 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3919     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3920 wakaba 1.56 replace => FEATURE_WF2,
3921 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
3922     sdasuff => FEATURE_HTML20_RFC,
3923 wakaba 1.52 target => FEATURE_M12N10_REC,
3924     }),
3925     ## TODO: Tests
3926     ## TODO: Tests for <nest/> in <form>
3927 wakaba 1.66 check_start => sub {
3928     my ($self, $item, $element_state) = @_;
3929    
3930     $element_state->{uri_info}->{action}->{type}->{action} = 1;
3931     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
3932     },
3933 wakaba 1.52 };
3934    
3935     $Element->{$HTML_NS}->{fieldset} = {
3936     %HTMLProseContentChecker, ## NOTE: legend, %Flow; ## TODO: legend
3937     status => FEATURE_WF2 | FEATURE_M12N10_REC,
3938 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
3939     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3940     ## TODO: form [WF2]
3941     }, {
3942 wakaba 1.52 %HTMLAttrStatus,
3943     %HTMLM12NCommonAttrStatus,
3944 wakaba 1.56 disabled => FEATURE_WF2,
3945     form => FEATURE_WF2,
3946 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3947     }),
3948     ## TODO: Tests
3949     ## TODO: Tests for <nest/> in <fieldset>
3950     };
3951    
3952     $Element->{$HTML_NS}->{input} = {
3953 wakaba 1.56 %HTMLEmptyChecker, ## MUST [WF2]
3954 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
3955     check_attrs => $GetHTMLAttrsChecker->({
3956 wakaba 1.56 accept => $AttrCheckerNotImplemented, ## TODO: ContentTypes [WF2]
3957 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3958 wakaba 1.56 action => $HTMLURIAttrChecker,
3959 wakaba 1.52 align => $GetHTMLEnumeratedAttrChecker->({
3960     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
3961     }),
3962     alt => sub {}, ## NOTE: Text [M12N] ## TODO: |alt| should be provided for |type=image| [HTML4]
3963     ## NOTE: HTML4 has a "should" for accessibility, which cannot be tested
3964     ## here.
3965 wakaba 1.56 autocomplete => $GetHTMLEnumeratedAttrChecker->({on => 1, off => 1}),
3966     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
3967 wakaba 1.52 checked => $GetHTMLBooleanAttrChecker->('checked'),
3968     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3969 wakaba 1.56 enctype => $HTMLIMTAttrChecker,
3970     ## TODO: form [WF2]
3971     ## TODO: inputmode [WF2]
3972 wakaba 1.52 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
3973 wakaba 1.56 ## TODO: list [WF2]
3974     ## TODO: max [WF2]
3975 wakaba 1.52 maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3976 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
3977     get => 1, post => 1, put => 1, delete => 1,
3978     }),
3979     ## TODO: min [WF2]
3980 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
3981     readonly => $GetHTMLBooleanAttrChecker->('readonly'),
3982 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
3983     required => $GetHTMLBooleanAttrChecker->('required'),
3984 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3985     src => $HTMLURIAttrChecker,
3986 wakaba 1.56 ## TODO: step [WF2]
3987     target => $HTMLTargetAttrChecker,
3988     ## TODO: template
3989 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
3990     text => 1, password => 1, checkbox => 1, radio => 1, submit => 1,
3991     reset => 1, file => 1, hidden => 1, image => 1, button => 1,
3992 wakaba 1.56 ## [WF2]
3993     datatime => 1, 'datetime-local' => 1, date => 1, month => 1, week => 1,
3994     time => 1, number => 1, range => 1, email => 1, url => 1,
3995     add => 1, remove => 1, 'move-up' => 1, 'move-down' => 1,
3996 wakaba 1.52 }),
3997     usemap => $HTMLUsemapAttrChecker,
3998 wakaba 1.56 value => sub {}, ## NOTE: CDATA [M12N] ## TODO: "optional except when the type attribute has the value "radio" or "checkbox"" [HTML4] ## TODO: constraints [WF2]
3999     ## TODO: "authors should ensure that in each set of radio buttons that one is initially "on"." [HTML4] [WF2]
4000 wakaba 1.52 }, {
4001     %HTMLAttrStatus,
4002     %HTMLM12NCommonAttrStatus,
4003 wakaba 1.56 accept => FEATURE_WF2 | FEATURE_M12N10_REC,
4004 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
4005 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
4006 wakaba 1.56 action => FEATURE_WF2,
4007 wakaba 1.52 align => FEATURE_M12N10_REC_DEPRECATED,
4008     alt => FEATURE_M12N10_REC,
4009 wakaba 1.56 autocomplete => FEATURE_WF2,
4010     autofocus => FEATURE_WF2,
4011 wakaba 1.52 checked => FEATURE_M12N10_REC,
4012     datafld => FEATURE_HTML4_REC_RESERVED,
4013     dataformatas => FEATURE_HTML4_REC_RESERVED,
4014     datasrc => FEATURE_HTML4_REC_RESERVED,
4015 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4016 wakaba 1.65 enctype => FEATURE_WF2,
4017 wakaba 1.56 form => FEATURE_WF2,
4018     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
4019 wakaba 1.52 ismap => FEATURE_M12N10_REC,
4020     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4021 wakaba 1.56 list => FEATURE_WF2,
4022     max => FEATURE_WF2,
4023     maxlength => FEATURE_WF2 | FEATURE_M12N10_REC,
4024     method => FEATURE_WF2,
4025     min => FEATURE_WF2,
4026 wakaba 1.52 name => FEATURE_M12N10_REC,
4027     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4028     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4029     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4030     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4031 wakaba 1.56 readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
4032 wakaba 1.65 replace => FEATURE_WF2,
4033 wakaba 1.56 required => FEATURE_WF2,
4034 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4035 wakaba 1.56 size => FEATURE_WF2_DEPRECATED | FEATURE_M12N10_REC,
4036 wakaba 1.52 src => FEATURE_M12N10_REC,
4037 wakaba 1.56 step => FEATURE_WF2,
4038 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4039 wakaba 1.65 target => FEATURE_WF2,
4040 wakaba 1.56 template => FEATURE_WF2,
4041 wakaba 1.52 type => FEATURE_M12N10_REC,
4042     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
4043     value => FEATURE_M12N10_REC,
4044     }),
4045     ## TODO: Tests
4046     ## TODO: Tests for <nest/> in <input>
4047 wakaba 1.66 check_start => sub {
4048     my ($self, $item, $element_state) = @_;
4049    
4050     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4051     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4052     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4053     },
4054 wakaba 1.52 };
4055    
4056 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
4057    
4058 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
4059     %HTMLProseContentChecker, ## NOTE: %Flow; - something [XHTML10]
4060     ## TODO: -A|%formctrl;|form|fieldset [HTML4]
4061     ## TODO: image map (img) in |button| is "illegal" [HTML4].
4062     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4063     check_attrs => $GetHTMLAttrsChecker->({
4064 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4065 wakaba 1.56 action => $HTMLURIAttrChecker,
4066     autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4067 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4068 wakaba 1.56 ## TODO: form [WF2]
4069     method => $GetHTMLEnumeratedAttrChecker->({
4070     get => 1, post => 1, put => 1, delete => 1,
4071     }),
4072 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
4073 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
4074     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
4075     target => $HTMLTargetAttrChecker,
4076     ## TODO: template [WF2]
4077 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
4078     button => 1, submit => 1, reset => 1,
4079     }),
4080     value => sub {}, ## NOTE: CDATA [M12N]
4081     }, {
4082     %HTMLAttrStatus,
4083     %HTMLM12NCommonAttrStatus,
4084     accesskey => FEATURE_M12N10_REC,
4085 wakaba 1.56 action => FEATURE_WF2,
4086     autofocus => FEATURE_WF2,
4087 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
4088     dataformatas => FEATURE_HTML4_REC_RESERVED,
4089     datasrc => FEATURE_HTML4_REC_RESERVED,
4090 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4091     enctype => FEATURE_WF2,
4092     form => FEATURE_WF2,
4093 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4094 wakaba 1.56 method => FEATURE_WF2,
4095 wakaba 1.52 name => FEATURE_M12N10_REC,
4096     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4097     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4098 wakaba 1.56 oninvalid => FEATURE_WF2,
4099     replace => FEATURE_WF2,
4100 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4101 wakaba 1.56 target => FEATURE_WF2,
4102     template => FEATURE_WF2,
4103 wakaba 1.52 type => FEATURE_M12N10_REC,
4104     value => FEATURE_M12N10_REC,
4105     }),
4106     ## TODO: Tests
4107     ## TODO: Tests for <nest/> in <button>
4108 wakaba 1.66 check_start => sub {
4109     my ($self, $item, $element_state) = @_;
4110    
4111     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4112     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4113     },
4114 wakaba 1.52 };
4115    
4116     $Element->{$HTML_NS}->{label} = {
4117     %HTMLPhrasingContentChecker, ## NOTE: %Inline - label [XHTML10] ## TODO: -label
4118 wakaba 1.56 ## TODO: At most one form control [WF2]
4119 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
4120     check_attrs => $GetHTMLAttrsChecker->({
4121 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4122 wakaba 1.52 for => $AttrCheckerNotImplemented, ## TODO: IDREF ## TODO: Must be |id| of control [HTML4] ## TODO: Or, "may only contain one control element"
4123     }, {
4124     %HTMLAttrStatus,
4125     %HTMLM12NCommonAttrStatus,
4126 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
4127 wakaba 1.52 for => FEATURE_M12N10_REC,
4128     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4129     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4130     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4131     }),
4132     ## TODO: Tests
4133     ## TODO: Tests for <nest/> in <label>
4134     };
4135    
4136     $Element->{$HTML_NS}->{select} = {
4137 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (optgroup|option)* [HTML4] + [WF2] ## TODO: SHOULD avoid empty and visible [WF2]
4138 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
4139     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
4140     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4141 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
4142 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4143 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4144 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4145 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4146 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
4147     ## TODO: form [WF2]
4148 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4149     name => sub {}, ## NOTE: CDATA [M12N]
4150 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
4151     ## TODO: pattern [WF2] ## TODO: |title| semantics
4152 wakaba 1.52 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4153     }, {
4154     %HTMLAttrStatus,
4155     %HTMLM12NCommonAttrStatus,
4156 wakaba 1.56 accesskey => FEATURE_WF2,
4157     autofocus => FEATURE_WF2,
4158     data => FEATURE_WF2,
4159 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
4160     dataformatas => FEATURE_HTML4_REC_RESERVED,
4161     datasrc => FEATURE_HTML4_REC_RESERVED,
4162 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4163     form => FEATURE_WF2,
4164 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4165     multiple => FEATURE_M12N10_REC,
4166     name => FEATURE_M12N10_REC,
4167     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4168     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4169     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4170 wakaba 1.56 oninvalid => FEATURE_WF2,
4171     pattern => FEATURE_WF2,
4172 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4173     sdapref => FEATURE_HTML20_RFC,
4174 wakaba 1.52 size => FEATURE_M12N10_REC,
4175     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4176     }),
4177     ## TODO: Tests
4178     ## TODO: Tests for <nest/> in <select>
4179 wakaba 1.66 check_start => sub {
4180     my ($self, $item, $element_state) = @_;
4181    
4182     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4183     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4184     },
4185 wakaba 1.52 };
4186 wakaba 1.1
4187 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
4188 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (transparent | option)*
4189     ## TODO: |option| child MUST be empty [WF2]
4190 wakaba 1.52 status => FEATURE_WF2,
4191 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
4192     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
4193     }, {
4194 wakaba 1.52 %HTMLAttrStatus,
4195 wakaba 1.56 data => FEATURE_WF2,
4196 wakaba 1.52 }),
4197     ## TODO: Tests
4198     ## TODO: Tests for <nest/> in <datalist>
4199 wakaba 1.66 check_start => sub {
4200     my ($self, $item, $element_state) = @_;
4201    
4202     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4203     },
4204 wakaba 1.52 };
4205 wakaba 1.49
4206 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
4207 wakaba 1.56 %HTMLProseContentChecker, ## TODO: (option|optgroup)* [HTML4] + [WF2] SHOULD avoid empty and visible [WF2]
4208 wakaba 1.52 status => FEATURE_WF2 | FEATURE_M12N10_REC,
4209     check_attrs => $GetHTMLAttrsChecker->({
4210     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4211     label => sub {}, ## NOTE: Text [M12N] ## TODO: required
4212     }, {
4213     %HTMLAttrStatus,
4214     %HTMLM12NCommonAttrStatus,
4215 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4216 wakaba 1.52 label => FEATURE_M12N10_REC,
4217     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4218     }),
4219     ## TODO: Tests
4220     ## TODO: Tests for <nest/> in <optgroup>
4221     };
4222    
4223     $Element->{$HTML_NS}->{option} = {
4224     %HTMLTextChecker,
4225     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4226     check_attrs => $GetHTMLAttrsChecker->({
4227     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4228     label => sub {}, ## NOTE: Text [M12N]
4229     selected => $GetHTMLBooleanAttrChecker->('selected'),
4230     value => sub {}, ## NOTE: CDATA [M12N]
4231     }, {
4232     %HTMLAttrStatus,
4233     %HTMLM12NCommonAttrStatus,
4234 wakaba 1.56 disabled => FEATURE_WF2, FEATURE_M12N10_REC,
4235 wakaba 1.52 label => FEATURE_M12N10_REC,
4236     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4237 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4238     sdapref => FEATURE_HTML20_RFC,
4239 wakaba 1.52 selected => FEATURE_M12N10_REC,
4240     value => FEATURE_M12N10_REC,
4241     }),
4242     ## TODO: Tests
4243     ## TODO: Tests for <nest/> in <option>
4244     };
4245 wakaba 1.49
4246 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
4247     %HTMLTextChecker,
4248     status => FEATURE_WF2 | FEATURE_M12N10_REC,
4249     check_attrs => $GetHTMLAttrsChecker->({
4250 wakaba 1.56 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type
4251 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4252 wakaba 1.56 autofocus => $GetHTMLBooleanAttrChecker->('autofocus'),
4253     cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## TODO: SHOULD if wrap=hard [WF2]
4254 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4255 wakaba 1.56 ## TODO: form [WF2]
4256     ## TODO: inputmode [WF2]
4257     maxlength => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4258 wakaba 1.52 name => sub {}, ## NOTE: CDATA [M12N]
4259 wakaba 1.56 ## TODO: pattern [WF2] ## TODO: |title| special semantics
4260 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
4261 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
4262     rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4263     oninvalid => $HTMLEventHandlerAttrChecker,
4264     wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
4265 wakaba 1.52 }, {
4266     %HTMLAttrStatus,
4267     %HTMLM12NCommonAttrStatus,
4268 wakaba 1.56 accept => FEATURE_WF2,
4269 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
4270 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
4271 wakaba 1.56 autofocus => FEATURE_WF2,
4272 wakaba 1.52 cols => FEATURE_M12N10_REC,
4273     datafld => FEATURE_HTML4_REC_RESERVED,
4274 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
4275     datasrc => FEATURE_HTML4_REC_RESERVED,
4276 wakaba 1.56 disabled => FEATURE_WF2 | FEATURE_M12N10_REC,
4277     form => FEATURE_WF2,
4278     inputmode => FEATURE_WF2 | FEATURE_XHTMLBASIC11_CR,
4279 wakaba 1.52 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4280 wakaba 1.56 maxlength => FEATURE_WF2,
4281 wakaba 1.52 name => FEATURE_M12N10_REC,
4282     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4283     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4284     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4285 wakaba 1.56 oninvalid => FEATURE_WF2,
4286 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4287 wakaba 1.56 pattern => FEATURE_WF2,
4288     readonly => FEATURE_WF2 | FEATURE_M12N10_REC,
4289     required => FEATURE_WF2,
4290 wakaba 1.61 rows => FEATURE_M12N10_REC,
4291     sdaform => FEATURE_HTML20_RFC,
4292     sdapref => FEATURE_HTML20_RFC,
4293 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4294 wakaba 1.56 wrap => FEATURE_WF2,
4295 wakaba 1.52 }),
4296     ## TODO: Tests
4297     ## TODO: Tests for <nest/> in <textarea>
4298 wakaba 1.66 check_start => sub {
4299     my ($self, $item, $element_state) = @_;
4300    
4301     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
4302     },
4303 wakaba 1.52 };
4304 wakaba 1.49
4305 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
4306 wakaba 1.56 %HTMLPhrasingContentChecker, ## Inline [WF2]
4307 wakaba 1.52 status => FEATURE_WF2,
4308 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
4309     ## TODO: for [WF2]
4310     ## TODO: form [WF2]
4311     ## TODO: name [WF2]
4312     ## onformchange[WF2]
4313     ## onforminput[WF2]
4314     }, {
4315 wakaba 1.52 %HTMLAttrStatus,
4316 wakaba 1.56 for => FEATURE_WF2,
4317     form => FEATURE_WF2,
4318     name => FEATURE_WF2,
4319     onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
4320     onformchange => FEATURE_WF2,
4321     onforminput => FEATURE_WF2,
4322 wakaba 1.52 }),
4323     ## TODO: Tests
4324     ## TODO: Tests for <nest/> in <output>
4325 wakaba 1.56 ## NOTE: "The output element should be used when ..." [WF2]
4326 wakaba 1.52 };
4327    
4328     ## TODO: repetition template
4329    
4330     $Element->{$HTML_NS}->{isindex} = {
4331     %HTMLEmptyChecker,
4332 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
4333     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
4334 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4335     prompt => sub {}, ## NOTE: Text [M12N]
4336     }, {
4337     %HTMLAttrStatus,
4338     class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4339     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4340     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4341     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4342     prompt => FEATURE_M12N10_REC_DEPRECATED,
4343 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4344 wakaba 1.52 style => FEATURE_XHTML10_REC,
4345     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4346     }),
4347     ## TODO: Tests
4348     ## TODO: Tests for <nest/> in <isindex>
4349 wakaba 1.66 check_start => sub {
4350     my ($self, $item, $element_state) = @_;
4351    
4352     $element_state->{uri_info}->{action}->{type}->{action} = 1;
4353     },
4354 wakaba 1.52 };
4355 wakaba 1.49
4356 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
4357 wakaba 1.40 %HTMLChecker,
4358 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4359 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4360 wakaba 1.1 src => $HTMLURIAttrChecker,
4361     defer => $GetHTMLBooleanAttrChecker->('defer'),
4362     async => $GetHTMLBooleanAttrChecker->('async'),
4363     type => $HTMLIMTAttrChecker,
4364 wakaba 1.49 }, {
4365     %HTMLAttrStatus,
4366     %HTMLM12NCommonAttrStatus,
4367 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
4368 wakaba 1.49 charset => FEATURE_M12N10_REC,
4369 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4370 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
4371     for => FEATURE_HTML4_REC_RESERVED,
4372 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4373 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
4374 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4375     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4376 wakaba 1.9 }),
4377 wakaba 1.40 check_start => sub {
4378     my ($self, $item, $element_state) = @_;
4379 wakaba 1.1
4380 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
4381     $element_state->{must_be_empty} = 1;
4382 wakaba 1.1 } else {
4383     ## NOTE: No content model conformance in HTML5 spec.
4384 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
4385     my $language = $item->{node}->get_attribute_ns (undef, 'language');
4386 wakaba 1.1 if ((defined $type and $type eq '') or
4387     (defined $language and $language eq '')) {
4388     $type = 'text/javascript';
4389     } elsif (defined $type) {
4390     #
4391     } elsif (defined $language) {
4392     $type = 'text/' . $language;
4393     } else {
4394     $type = 'text/javascript';
4395     }
4396 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
4397     }
4398 wakaba 1.66
4399     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
4400 wakaba 1.40 },
4401     check_child_element => sub {
4402     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4403     $child_is_transparent, $element_state) = @_;
4404     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4405     $self->{onerror}->(node => $child_el,
4406     type => 'element not allowed:minus',
4407     level => $self->{must_level});
4408     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4409     #
4410     } else {
4411     if ($element_state->{must_be_empty}) {
4412     $self->{onerror}->(node => $child_el,
4413     type => 'element not allowed');
4414     }
4415     }
4416     },
4417     check_child_text => sub {
4418     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4419     if ($has_significant and
4420     $element_state->{must_be_empty}) {
4421     $self->{onerror}->(node => $child_node,
4422     type => 'character not allowed');
4423     }
4424     },
4425     check_end => sub {
4426     my ($self, $item, $element_state) = @_;
4427     unless ($element_state->{must_be_empty}) {
4428     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
4429     type => 'script:'.$element_state->{script_type});
4430     ## TODO: text/javascript support
4431    
4432     $HTMLChecker{check_end}->(@_);
4433 wakaba 1.1 }
4434     },
4435     };
4436 wakaba 1.25 ## ISSUE: Significant check and text child node
4437 wakaba 1.1
4438     ## NOTE: When script is disabled.
4439     $Element->{$HTML_NS}->{noscript} = {
4440 wakaba 1.40 %HTMLTransparentChecker,
4441 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4442     check_attrs => $GetHTMLAttrsChecker->({}, {
4443     %HTMLAttrStatus,
4444     %HTMLM12NCommonAttrStatus,
4445 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4446 wakaba 1.49 }),
4447 wakaba 1.40 check_start => sub {
4448     my ($self, $item, $element_state) = @_;
4449 wakaba 1.3
4450 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
4451     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
4452 wakaba 1.3 }
4453    
4454 wakaba 1.40 unless ($self->{flag}->{in_head}) {
4455     $self->_add_minus_elements ($element_state,
4456     {$HTML_NS => {noscript => 1}});
4457     }
4458 wakaba 1.3 },
4459 wakaba 1.40 check_child_element => sub {
4460     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4461     $child_is_transparent, $element_state) = @_;
4462     if ($self->{flag}->{in_head}) {
4463     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4464     $self->{onerror}->(node => $child_el,
4465     type => 'element not allowed:minus',
4466     level => $self->{must_level});
4467     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4468     #
4469     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
4470     #
4471     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
4472     if ($child_el->has_attribute_ns (undef, 'scoped')) {
4473     $self->{onerror}->(node => $child_el,
4474     type => 'element not allowed:head noscript',
4475     level => $self->{must_level});
4476     }
4477     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
4478 wakaba 1.47 my $http_equiv_attr
4479     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
4480     if ($http_equiv_attr) {
4481     ## TODO: case
4482     if (lc $http_equiv_attr->value eq 'content-type') {
4483 wakaba 1.40 $self->{onerror}->(node => $child_el,
4484 wakaba 1.34 type => 'element not allowed:head noscript',
4485     level => $self->{must_level});
4486 wakaba 1.47 } else {
4487     #
4488 wakaba 1.3 }
4489 wakaba 1.47 } else {
4490     $self->{onerror}->(node => $child_el,
4491     type => 'element not allowed:head noscript',
4492     level => $self->{must_level});
4493 wakaba 1.3 }
4494 wakaba 1.40 } else {
4495     $self->{onerror}->(node => $child_el,
4496     type => 'element not allowed:head noscript',
4497     level => $self->{must_level});
4498     }
4499     } else {
4500     $HTMLTransparentChecker{check_child_element}->(@_);
4501     }
4502     },
4503     check_child_text => sub {
4504     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4505     if ($self->{flag}->{in_head}) {
4506     if ($has_significant) {
4507     $self->{onerror}->(node => $child_node,
4508     type => 'character not allowed');
4509 wakaba 1.3 }
4510     } else {
4511 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
4512     }
4513     },
4514     check_end => sub {
4515     my ($self, $item, $element_state) = @_;
4516     $self->_remove_minus_elements ($element_state);
4517     if ($self->{flag}->{in_head}) {
4518     $HTMLChecker{check_end}->(@_);
4519     } else {
4520     $HTMLPhrasingContentChecker{check_end}->(@_);
4521 wakaba 1.3 }
4522 wakaba 1.1 },
4523     };
4524 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
4525 wakaba 1.1
4526     $Element->{$HTML_NS}->{'event-source'} = {
4527 wakaba 1.40 %HTMLEmptyChecker,
4528 wakaba 1.48 status => FEATURE_HTML5_LC,
4529 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4530 wakaba 1.1 src => $HTMLURIAttrChecker,
4531 wakaba 1.50 }, {
4532     %HTMLAttrStatus,
4533     src => FEATURE_HTML5_LC,
4534 wakaba 1.1 }),
4535 wakaba 1.66 check_start => sub {
4536     my ($self, $item, $element_state) = @_;
4537    
4538     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
4539     },
4540 wakaba 1.1 };
4541    
4542     $Element->{$HTML_NS}->{details} = {
4543 wakaba 1.40 %HTMLProseContentChecker,
4544 wakaba 1.48 status => FEATURE_HTML5_WD,
4545 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4546 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
4547 wakaba 1.50 }, {
4548     %HTMLAttrStatus,
4549 wakaba 1.59 open => FEATURE_HTML5_WD,
4550 wakaba 1.1 }),
4551 wakaba 1.43 ## NOTE: legend, Prose
4552     check_child_element => sub {
4553     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4554     $child_is_transparent, $element_state) = @_;
4555     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4556     $self->{onerror}->(node => $child_el,
4557     type => 'element not allowed:minus',
4558     level => $self->{must_level});
4559     $element_state->{has_non_legend} = 1;
4560     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4561     #
4562     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4563     if ($element_state->{has_non_legend}) {
4564     $self->{onerror}->(node => $child_el,
4565     type => 'element not allowed:details legend',
4566     level => $self->{must_level});
4567     }
4568     $element_state->{has_legend} = 1;
4569     $element_state->{has_non_legend} = 1;
4570     } else {
4571     $HTMLProseContentChecker{check_child_element}->(@_);
4572     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4573     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
4574     ## is conforming?
4575     }
4576     },
4577     check_child_text => sub {
4578     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4579     if ($has_significant) {
4580     $element_state->{has_non_legend} = 1;
4581     }
4582     },
4583     check_end => sub {
4584     my ($self, $item, $element_state) = @_;
4585 wakaba 1.1
4586 wakaba 1.43 unless ($element_state->{has_legend}) {
4587     $self->{onerror}->(node => $item->{node},
4588     type => 'element missing:legend',
4589     level => $self->{must_level});
4590     }
4591    
4592     $HTMLProseContentChecker{check_end}->(@_);
4593     ## ISSUE: |<details><legend>aa</legend></details>| error?
4594 wakaba 1.1 },
4595     };
4596    
4597     $Element->{$HTML_NS}->{datagrid} = {
4598 wakaba 1.40 %HTMLProseContentChecker,
4599 wakaba 1.48 status => FEATURE_HTML5_WD,
4600 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4601 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4602     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
4603 wakaba 1.50 }, {
4604     %HTMLAttrStatus,
4605     disabled => FEATURE_HTML5_WD,
4606     multiple => FEATURE_HTML5_WD,
4607 wakaba 1.1 }),
4608 wakaba 1.40 check_start => sub {
4609     my ($self, $item, $element_state) = @_;
4610 wakaba 1.1
4611 wakaba 1.40 $self->_add_minus_elements ($element_state,
4612     {$HTML_NS => {a => 1, datagrid => 1}});
4613     $element_state->{phase} = 'any';
4614     },
4615     ## Prose -(text* table Prose*) | table | select | datalist | Empty
4616     check_child_element => sub {
4617     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4618     $child_is_transparent, $element_state) = @_;
4619     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4620     $self->{onerror}->(node => $child_el,
4621     type => 'element not allowed:minus',
4622     level => $self->{must_level});
4623     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4624     #
4625     } elsif ($element_state->{phase} eq 'prose') {
4626     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4627 wakaba 1.44 if (not $element_state->{has_element} and
4628 wakaba 1.40 $child_nsuri eq $HTML_NS and
4629     $child_ln eq 'table') {
4630     $self->{onerror}->(node => $child_el,
4631     type => 'element not allowed');
4632     } else {
4633 wakaba 1.8 #
4634 wakaba 1.1 }
4635 wakaba 1.40 } else {
4636     $self->{onerror}->(node => $child_el,
4637     type => 'element not allowed');
4638     }
4639 wakaba 1.43 $element_state->{has_element} = 1;
4640 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
4641     if ($child_nsuri eq $HTML_NS and
4642     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
4643     $element_state->{phase} = 'none';
4644     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
4645     $element_state->{has_element} = 1;
4646     $element_state->{phase} = 'prose';
4647 wakaba 1.43 ## TODO: transparent?
4648 wakaba 1.40 } else {
4649     $self->{onerror}->(node => $child_el,
4650     type => 'element not allowed');
4651     }
4652     } elsif ($element_state->{phase} eq 'none') {
4653     $self->{onerror}->(node => $child_el,
4654     type => 'element not allowed');
4655     } else {
4656     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
4657     }
4658     },
4659     check_child_text => sub {
4660     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4661     if ($has_significant) {
4662     if ($element_state->{phase} eq 'prose') {
4663     #
4664     } elsif ($element_state->{phase} eq 'any') {
4665     $element_state->{phase} = 'prose';
4666     } else {
4667     $self->{onerror}->(node => $child_node,
4668     type => 'character not allowed');
4669 wakaba 1.1 }
4670     }
4671 wakaba 1.40 },
4672     check_end => sub {
4673     my ($self, $item, $element_state) = @_;
4674     $self->_remove_minus_elements ($element_state);
4675 wakaba 1.1
4676 wakaba 1.40 if ($element_state->{phase} eq 'none') {
4677     $HTMLChecker{check_end}->(@_);
4678     } else {
4679     $HTMLPhrasingContentChecker{check_end}->(@_);
4680     }
4681     },
4682 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
4683     ## are not disallowed (assuming that form control contents are also
4684     ## prose content).
4685 wakaba 1.1 };
4686    
4687     $Element->{$HTML_NS}->{command} = {
4688 wakaba 1.40 %HTMLEmptyChecker,
4689 wakaba 1.48 status => FEATURE_HTML5_WD,
4690 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4691 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
4692     default => $GetHTMLBooleanAttrChecker->('default'),
4693     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
4694     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
4695     icon => $HTMLURIAttrChecker,
4696     label => sub { }, ## NOTE: No conformance creteria
4697     radiogroup => sub { }, ## NOTE: No conformance creteria
4698     type => sub {
4699     my ($self, $attr) = @_;
4700     my $value = $attr->value;
4701     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
4702     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
4703     }
4704     },
4705 wakaba 1.50 }, {
4706     %HTMLAttrStatus,
4707     checked => FEATURE_HTML5_WD,
4708     default => FEATURE_HTML5_WD,
4709     disabled => FEATURE_HTML5_WD,
4710     hidden => FEATURE_HTML5_WD,
4711     icon => FEATURE_HTML5_WD,
4712     label => FEATURE_HTML5_WD,
4713     radiogroup => FEATURE_HTML5_WD,
4714     type => FEATURE_HTML5_WD,
4715 wakaba 1.1 }),
4716 wakaba 1.66 check_start => sub {
4717     my ($self, $item, $element_state) = @_;
4718    
4719     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
4720     },
4721 wakaba 1.1 };
4722    
4723     $Element->{$HTML_NS}->{menu} = {
4724 wakaba 1.40 %HTMLPhrasingContentChecker,
4725 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
4726     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
4727     ## NOTE: We don't want any |menu| element warned as deprecated.
4728 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4729 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
4730 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
4731 wakaba 1.1 id => sub {
4732     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
4733     my ($self, $attr) = @_;
4734     my $value = $attr->value;
4735     if (length $value > 0) {
4736     if ($self->{id}->{$value}) {
4737     $self->{onerror}->(node => $attr, type => 'duplicate ID');
4738     push @{$self->{id}->{$value}}, $attr;
4739     } else {
4740     $self->{id}->{$value} = [$attr];
4741     }
4742     } else {
4743     ## NOTE: MUST contain at least one character
4744     $self->{onerror}->(node => $attr, type => 'empty attribute value');
4745     }
4746     if ($value =~ /[\x09-\x0D\x20]/) {
4747     $self->{onerror}->(node => $attr, type => 'space in ID');
4748     }
4749     $self->{menu}->{$value} ||= $attr;
4750     ## ISSUE: <menu id=""><p contextmenu=""> match?
4751     },
4752     label => sub { }, ## NOTE: No conformance creteria
4753     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
4754 wakaba 1.49 }, {
4755     %HTMLAttrStatus,
4756     %HTMLM12NCommonAttrStatus,
4757 wakaba 1.61 align => FEATURE_HTML2X_RFC,
4758 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
4759 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
4760 wakaba 1.50 label => FEATURE_HTML5_WD,
4761     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4762 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
4763     sdapref => FEATURE_HTML20_RFC,
4764 wakaba 1.50 type => FEATURE_HTML5_WD,
4765 wakaba 1.1 }),
4766 wakaba 1.40 check_start => sub {
4767     my ($self, $item, $element_state) = @_;
4768     $element_state->{phase} = 'li or phrasing';
4769     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
4770     $self->{flag}->{in_menu} = 1;
4771     },
4772     check_child_element => sub {
4773     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4774     $child_is_transparent, $element_state) = @_;
4775     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4776     $self->{onerror}->(node => $child_el,
4777     type => 'element not allowed:minus',
4778     level => $self->{must_level});
4779     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4780     #
4781     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
4782     if ($element_state->{phase} eq 'li') {
4783     #
4784     } elsif ($element_state->{phase} eq 'li or phrasing') {
4785     $element_state->{phase} = 'li';
4786     } else {
4787     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4788     }
4789     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4790     if ($element_state->{phase} eq 'phrasing') {
4791     #
4792     } elsif ($element_state->{phase} eq 'li or phrasing') {
4793     $element_state->{phase} = 'phrasing';
4794     } else {
4795     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4796     }
4797     } else {
4798     $self->{onerror}->(node => $child_el, type => 'element not allowed');
4799     }
4800     },
4801     check_child_text => sub {
4802     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4803     if ($has_significant) {
4804     if ($element_state->{phase} eq 'phrasing') {
4805     #
4806     } elsif ($element_state->{phase} eq 'li or phrasing') {
4807     $element_state->{phase} = 'phrasing';
4808     } else {
4809     $self->{onerror}->(node => $child_node,
4810     type => 'character not allowed');
4811 wakaba 1.1 }
4812     }
4813 wakaba 1.40 },
4814     check_end => sub {
4815     my ($self, $item, $element_state) = @_;
4816     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
4817    
4818     if ($element_state->{phase} eq 'li') {
4819     $HTMLChecker{check_end}->(@_);
4820     } else { # 'phrasing' or 'li or phrasing'
4821     $HTMLPhrasingContentChecker{check_end}->(@_);
4822 wakaba 1.1 }
4823     },
4824 wakaba 1.8 };
4825    
4826     $Element->{$HTML_NS}->{datatemplate} = {
4827 wakaba 1.40 %HTMLChecker,
4828 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4829 wakaba 1.40 check_child_element => sub {
4830     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4831     $child_is_transparent, $element_state) = @_;
4832     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
4833     $self->{onerror}->(node => $child_el,
4834     type => 'element not allowed:minus',
4835     level => $self->{must_level});
4836     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4837     #
4838     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
4839     #
4840     } else {
4841     $self->{onerror}->(node => $child_el,
4842     type => 'element not allowed:datatemplate');
4843     }
4844     },
4845     check_child_text => sub {
4846     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4847     if ($has_significant) {
4848     $self->{onerror}->(node => $child_node, type => 'character not allowed');
4849 wakaba 1.8 }
4850     },
4851     is_xml_root => 1,
4852     };
4853    
4854     $Element->{$HTML_NS}->{rule} = {
4855 wakaba 1.40 %HTMLChecker,
4856 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4857 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4858 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
4859 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
4860 wakaba 1.50 }, {
4861     %HTMLAttrStatus,
4862     condition => FEATURE_HTML5_AT_RISK,
4863     mode => FEATURE_HTML5_AT_RISK,
4864 wakaba 1.8 }),
4865 wakaba 1.40 check_start => sub {
4866     my ($self, $item, $element_state) = @_;
4867     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
4868     },
4869     check_child_element => sub { },
4870     check_child_text => sub { },
4871     check_end => sub {
4872     my ($self, $item, $element_state) = @_;
4873     $self->_remove_plus_elements ($element_state);
4874     $HTMLChecker{check_end}->(@_);
4875 wakaba 1.8 },
4876     ## NOTE: "MAY be anything that, when the parent |datatemplate|
4877     ## is applied to some conforming data, results in a conforming DOM tree.":
4878     ## We don't check against this.
4879     };
4880    
4881     $Element->{$HTML_NS}->{nest} = {
4882 wakaba 1.40 %HTMLEmptyChecker,
4883 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
4884 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4885 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
4886     mode => sub {
4887     my ($self, $attr) = @_;
4888     my $value = $attr->value;
4889     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
4890     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
4891     }
4892     },
4893 wakaba 1.50 }, {
4894     %HTMLAttrStatus,
4895     filter => FEATURE_HTML5_AT_RISK,
4896     mode => FEATURE_HTML5_AT_RISK,
4897 wakaba 1.8 }),
4898 wakaba 1.1 };
4899    
4900     $Element->{$HTML_NS}->{legend} = {
4901 wakaba 1.40 %HTMLPhrasingContentChecker,
4902 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4903 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
4904 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4905 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
4906     # top => 1, bottom => 1, left => 1, right => 1,
4907     # }),
4908     }, {
4909 wakaba 1.49 %HTMLAttrStatus,
4910     %HTMLM12NCommonAttrStatus,
4911     accesskey => FEATURE_M12N10_REC,
4912     align => FEATURE_M12N10_REC_DEPRECATED,
4913 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4914 wakaba 1.49 }),
4915 wakaba 1.1 };
4916    
4917     $Element->{$HTML_NS}->{div} = {
4918 wakaba 1.40 %HTMLProseContentChecker,
4919 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4920 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
4921     align => $GetHTMLEnumeratedAttrChecker->({
4922     left => 1, center => 1, right => 1, justify => 1,
4923     }),
4924     }, {
4925 wakaba 1.49 %HTMLAttrStatus,
4926     %HTMLM12NCommonAttrStatus,
4927     align => FEATURE_M12N10_REC_DEPRECATED,
4928     datafld => FEATURE_HTML4_REC_RESERVED,
4929     dataformatas => FEATURE_HTML4_REC_RESERVED,
4930     datasrc => FEATURE_HTML4_REC_RESERVED,
4931 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4932 wakaba 1.49 }),
4933 wakaba 1.66 check_start => sub {
4934     my ($self, $item, $element_state) = @_;
4935    
4936     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4937     },
4938 wakaba 1.1 };
4939    
4940 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
4941     %HTMLProseContentChecker,
4942     status => FEATURE_M12N10_REC_DEPRECATED,
4943     check_attrs => $GetHTMLAttrsChecker->({}, {
4944     %HTMLAttrStatus,
4945     %HTMLM12NCommonAttrStatus,
4946     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4947     }),
4948     };
4949    
4950 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
4951 wakaba 1.40 %HTMLTransparentChecker,
4952 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4953 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4954     }, {
4955     %HTMLAttrStatus,
4956 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4957 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4958 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4959 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4960 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4961     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4962 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4963 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4964     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4965 wakaba 1.49 }),
4966 wakaba 1.1 };
4967 wakaba 1.49
4968 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
4969     %HTMLEmptyChecker,
4970     status => FEATURE_M12N10_REC_DEPRECATED,
4971     check_attrs => $GetHTMLAttrsChecker->({
4972     ## TODO: color, face, size
4973     }, {
4974     %HTMLAttrStatus,
4975     color => FEATURE_M12N10_REC_DEPRECATED,
4976     face => FEATURE_M12N10_REC_DEPRECATED,
4977     #id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
4978     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4979     size => FEATURE_M12N10_REC_DEPRECATED,
4980     }),
4981     };
4982    
4983 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
4984     ## class title id cols rows onload onunload style(x10)
4985     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4986     ## noframes Common, lang(xhtml10)
4987    
4988     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4989 wakaba 1.56
4990 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
4991     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
4992     ## xmp, listing sdapref[HTML2,0]
4993    
4994 wakaba 1.56 =pod
4995    
4996     WF2: Documents MUST comply to [CHARMOD].
4997     WF2: Vencor extensions MUST NOT be used.
4998    
4999 wakaba 1.61 HTML 2.0 nextid @n
5000    
5001     RFC 2659: CERTS CRYPTOPTS
5002    
5003     ISO-HTML: pre-html, divN
5004    
5005 wakaba 1.56 =cut
5006 wakaba 1.61
5007     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
5008     ## We added them only to |a|. |link| and |form| might also allow them
5009     ## in theory.
5010 wakaba 1.1
5011     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
5012    
5013     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24