/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.175 - (hide annotations) (download)
Sun Jul 5 06:11:35 2009 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.174: +1 -1 lines
++ whatpm/t/dom-conformance/ChangeLog	5 Jul 2009 06:09:10 -0000
	* html-interactive-1.dat: The |default| attribute of the |command|
	element is tentatively disallowed (HTML5 revision 3067).

2009-07-05  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	5 Jul 2009 06:09:40 -0000
	* HTML.pm: Disallow |default| attribute of the |command| element
	for now (HTML5 revision 3067).

2009-07-05  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.174 ## --- Feature Status ---
10    
11 wakaba 1.89 sub FEATURE_HTML5_COMPLETE () {
12 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
13 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16 wakaba 1.154 sub FEATURE_HTML5_CR () {
17     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21 wakaba 1.54 sub FEATURE_HTML5_LC () {
22 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
23 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
24     Whatpm::ContentChecker::FEATURE_ALLOWED
25     }
26     sub FEATURE_HTML5_AT_RISK () {
27 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
28     ## status.
29 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31     }
32     sub FEATURE_HTML5_WD () {
33 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
34 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
35     Whatpm::ContentChecker::FEATURE_ALLOWED
36     }
37     sub FEATURE_HTML5_FD () {
38 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
39 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
40     Whatpm::ContentChecker::FEATURE_ALLOWED
41     }
42     sub FEATURE_HTML5_DEFAULT () {
43 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
44 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
45     Whatpm::ContentChecker::FEATURE_ALLOWED
46 wakaba 1.49 }
47 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
48 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
49     ## comments, but then dropped.
50 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
51     }
52 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
53 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
54     ## then dropped.
55 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
56     }
57 wakaba 1.154
58 wakaba 1.119 sub FEATURE_WF2X () {
59 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
60     ## incorporated into the HTML5 spec.
61 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
62     }
63 wakaba 1.54 sub FEATURE_WF2 () {
64 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
65     ## merged into HTML5.
66 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
67 wakaba 1.54 }
68 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
69 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
70     ## were not merged into HTML5.
71 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
72     }
73 wakaba 1.49
74 wakaba 1.154 sub FEATURE_RDFA_REC () {
75     Whatpm::ContentChecker::FEATURE_STATUS_REC
76 wakaba 1.121 }
77 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
78     ## NOTE: The feature that was defined in a RDFa last call working
79     ## draft, but then dropped.
80 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
81     }
82 wakaba 1.58
83     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
84     ## attribute can be used- the only requirements for that matter is:
85     ## "the attribute MUST be referenced using its namespace-qualified form" (and
86     ## this is a host language conformance!).
87 wakaba 1.82 sub FEATURE_ROLE_LC () {
88     Whatpm::ContentChecker::FEATURE_STATUS_LC
89     }
90    
91     sub FEATURE_XHTML2_ED () {
92 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
93     ## "http://www.w3.org/1999/xhtml".
94 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
95     }
96 wakaba 1.58
97 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
98 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
99     ## M12N).
100     Whatpm::ContentChecker::FEATURE_STATUS_REC
101 wakaba 1.55 }
102     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
103 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
104     ## features.
105     Whatpm::ContentChecker::FEATURE_STATUS_REC |
106 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
107     }
108    
109 wakaba 1.154 sub FEATURE_RUBY_REC () {
110     Whatpm::ContentChecker::FEATURE_STATUS_CR
111 wakaba 1.82 }
112    
113 wakaba 1.154 sub FEATURE_M12N11_LC () {
114     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
115     Whatpm::ContentChecker::FEATURE_STATUS_REC;
116 wakaba 1.99 }
117    
118 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
119     ## It contains a number of problems. (However, again, it's a REC!)
120 wakaba 1.54 sub FEATURE_M12N10_REC () {
121 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
122 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
123     }
124     sub FEATURE_M12N10_REC_DEPRECATED () {
125     Whatpm::ContentChecker::FEATURE_STATUS_REC |
126     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
127     }
128 wakaba 1.49
129     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
130     ## (second edition). Only missing attributes from M12N10 abstract
131     ## definition are added.
132 wakaba 1.54 sub FEATURE_XHTML10_REC () {
133     Whatpm::ContentChecker::FEATURE_STATUS_CR
134     }
135    
136 wakaba 1.61 ## NOTE: Diff from HTML4.
137     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
138     Whatpm::ContentChecker::FEATURE_STATUS_CR
139     }
140 wakaba 1.58
141 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
142     ## 4.01). Only missing attributes from XHTML10 are added.
143 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
144     Whatpm::ContentChecker::FEATURE_STATUS_WD
145     }
146    
147     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
148     ## rather than presentational attributes (deprecated or not deprecated).
149 wakaba 1.48
150 wakaba 1.61 ## NOTE: Diff from HTML4.
151     sub FEATURE_HTML32_REC_OBSOLETE () {
152     Whatpm::ContentChecker::FEATURE_STATUS_CR |
153     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
154     ## NOTE: Lowercase normative "should".
155     }
156    
157     sub FEATURE_RFC2659 () { ## Experimental RFC
158     Whatpm::ContentChecker::FEATURE_STATUS_CR
159     }
160    
161     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
162     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
163     Whatpm::ContentChecker::FEATURE_STATUS_CR
164     }
165    
166     ## NOTE: Diff from HTML 2.0.
167     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
168     Whatpm::ContentChecker::FEATURE_STATUS_CR
169     }
170    
171     ## NOTE: Diff from HTML 3.2.
172     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
173     Whatpm::ContentChecker::FEATURE_STATUS_CR
174     }
175 wakaba 1.58
176 wakaba 1.174 ## --- Content Model ---
177    
178 wakaba 1.29 ## December 2007 HTML5 Classification
179    
180     my $HTMLMetadataContent = {
181     $HTML_NS => {
182     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
183 wakaba 1.118 'event-source' => 1, eventsource => 1,
184     command => 1, datatemplate => 1,
185 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
186     ## a metadata content other than |head| element.
187     meta => 1,
188     },
189     ## NOTE: RDF is mentioned in the HTML5 spec.
190     ## TODO: Other RDF elements?
191     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
192     };
193    
194 wakaba 1.72 my $HTMLFlowContent = {
195 wakaba 1.29 $HTML_NS => {
196     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
197     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
198     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
199     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
200 wakaba 1.119 form => 1, fieldset => 1,
201 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
202     datagrid => 1, ## ISSUE: "Flow element" in spec.
203 wakaba 1.29 datatemplate => 1,
204     div => 1, ## ISSUE: No category in spec.
205     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
206     ## Additionally, it must be before any other element or
207     ## non-inter-element-whitespace text node.
208     style => 1,
209    
210 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
211 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
212     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
213 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
214 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
215     command => 1, bb => 1,
216 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
217 wakaba 1.121 textarea => 1, output => 1,
218 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
219     ## NOTE: |area| is allowed only as a descendant of |map|.
220     area => 1,
221    
222 wakaba 1.124 ## NOTE: Transparent.
223     a => 1, ins => 1, del => 1, font => 1,
224 wakaba 1.29
225 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
226 wakaba 1.29 menu => 1,
227    
228     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
229     canvas => 1,
230     },
231    
232     ## NOTE: Embedded
233     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
234     q<http://www.w3.org/2000/svg> => {svg => 1},
235     };
236    
237 wakaba 1.58 my $HTMLSectioningContent = {
238 wakaba 1.57 $HTML_NS => {
239     section => 1, nav => 1, article => 1, aside => 1,
240     ## NOTE: |body| is only allowed in |html| element.
241     body => 1,
242     },
243     };
244    
245 wakaba 1.58 my $HTMLSectioningRoot = {
246 wakaba 1.29 $HTML_NS => {
247 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
248 wakaba 1.29 },
249     };
250    
251     my $HTMLHeadingContent = {
252     $HTML_NS => {
253     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
254     },
255     };
256    
257     my $HTMLPhrasingContent = {
258 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
259 wakaba 1.29 $HTML_NS => {
260 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
261 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
262     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
263 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
264 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
265     command => 1, bb => 1,
266 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
267 wakaba 1.121 textarea => 1, output => 1,
268 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
269     ## NOTE: |area| is allowed only as a descendant of |map|.
270     area => 1,
271    
272     ## NOTE: Transparent.
273 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
274 wakaba 1.29
275 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
276 wakaba 1.29 menu => 1,
277    
278     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
279     canvas => 1,
280     },
281    
282     ## NOTE: Embedded
283     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
284     q<http://www.w3.org/2000/svg> => {svg => 1},
285    
286     ## NOTE: And non-inter-element-whitespace text nodes.
287     };
288    
289 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
290 wakaba 1.29
291     my $HTMLInteractiveContent = {
292     $HTML_NS => {
293     a => 1,
294 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
295     details => 1, datagrid => 1, bb => 1,
296    
297     ## NOTE: When "controls" attribute is specified.
298     video => 1, audio => 1,
299    
300     ## NOTE: When "type=toolbar" attribute is specified.
301     menu => 1,
302 wakaba 1.29 },
303     };
304    
305 wakaba 1.139 ## NOTE: Labelable form-associated element.
306     my $LabelableFAE = {
307     $HTML_NS => {
308     input => 1, button => 1, select => 1, textarea => 1,
309     },
310     };
311    
312 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
313    
314 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
315     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
316    
317     ## -- Common attribute syntacx checkers
318    
319 wakaba 1.1 our $AttrChecker;
320 wakaba 1.82 our $AttrStatus;
321 wakaba 1.1
322     my $GetHTMLEnumeratedAttrChecker = sub {
323     my $states = shift; # {value => conforming ? 1 : -1}
324     return sub {
325     my ($self, $attr) = @_;
326     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
327     if ($states->{$value} > 0) {
328     #
329     } elsif ($states->{$value}) {
330 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
331     level => $self->{level}->{must});
332 wakaba 1.1 } else {
333 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
334     level => $self->{level}->{must});
335 wakaba 1.1 }
336     };
337     }; # $GetHTMLEnumeratedAttrChecker
338    
339     my $GetHTMLBooleanAttrChecker = sub {
340     my $local_name = shift;
341     return sub {
342     my ($self, $attr) = @_;
343 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
344 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
345 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
346 wakaba 1.104 level => $self->{level}->{must});
347 wakaba 1.1 }
348     };
349     }; # $GetHTMLBooleanAttrChecker
350    
351 wakaba 1.8 ## Unordered set of space-separated tokens
352 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
353     my $allowed_words = shift;
354     return sub {
355     my ($self, $attr) = @_;
356     my %word;
357 wakaba 1.132 for my $word (grep {length $_}
358     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
359 wakaba 1.92 unless ($word{$word}) {
360     $word{$word} = 1;
361     if (not defined $allowed_words or
362     $allowed_words->{$word}) {
363     #
364     } else {
365 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
366 wakaba 1.92 value => $word,
367 wakaba 1.104 level => $self->{level}->{must});
368 wakaba 1.92 }
369     } else {
370 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
371     value => $word,
372     level => $self->{level}->{must});
373 wakaba 1.92 }
374 wakaba 1.8 }
375 wakaba 1.92 };
376     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
377 wakaba 1.8
378 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
379 wakaba 1.1 ## whose allowed values are defined by the section on link types)
380     my $HTMLLinkTypesAttrChecker = sub {
381 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
382 wakaba 1.1 my %word;
383 wakaba 1.132 for my $word (grep {length $_}
384     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
385 wakaba 1.1 unless ($word{$word}) {
386     $word{$word} = 1;
387 wakaba 1.18 } elsif ($word eq 'up') {
388     #
389 wakaba 1.1 } else {
390 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
391     value => $word,
392     level => $self->{level}->{must});
393 wakaba 1.1 }
394     }
395     ## NOTE: Case sensitive match (since HTML5 spec does not say link
396     ## types are case-insensitive and it says "The value should not
397     ## be confusingly similar to any other defined value (e.g.
398     ## differing only in case).").
399     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
400     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
401     ## values to be used conformingly.
402 wakaba 1.66
403     my $is_hyperlink;
404     my $is_resource;
405 wakaba 1.1 require Whatpm::_LinkTypeList;
406     our $LinkType;
407     for my $word (keys %word) {
408     my $def = $LinkType->{$word};
409     if (defined $def) {
410     if ($def->{status} eq 'accepted') {
411     if (defined $def->{effect}->[$a_or_area]) {
412     #
413     } else {
414     $self->{onerror}->(node => $attr,
415 wakaba 1.104 type => 'link type:bad context',
416     value => $word,
417 wakaba 1.110 level => $self->{level}->{must});
418 wakaba 1.1 }
419     } elsif ($def->{status} eq 'proposal') {
420 wakaba 1.104 $self->{onerror}->(node => $attr,
421     type => 'link type:proposed',
422     value => $word,
423     level => $self->{level}->{should});
424 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
425     #
426     } else {
427     $self->{onerror}->(node => $attr,
428 wakaba 1.104 type => 'link type:bad context',
429     value => $word,
430     level => $self->{level}->{must});
431 wakaba 1.20 }
432 wakaba 1.1 } else { # rejected or synonym
433     $self->{onerror}->(node => $attr,
434 wakaba 1.104 type => 'link type:non-conforming',
435     value => $word,
436     level => $self->{level}->{must});
437 wakaba 1.1 }
438 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
439     if ($word eq 'alternate') {
440     #
441     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
442 wakaba 1.66 $is_hyperlink = 1;
443 wakaba 1.4 }
444     }
445 wakaba 1.1 if ($def->{unique}) {
446     unless ($self->{has_link_type}->{$word}) {
447     $self->{has_link_type}->{$word} = 1;
448     } else {
449     $self->{onerror}->(node => $attr,
450 wakaba 1.104 type => 'link type:duplicate',
451     value => $word,
452     level => $self->{level}->{must});
453 wakaba 1.1 }
454     }
455 wakaba 1.66
456     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
457     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
458     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
459     }
460 wakaba 1.1 } else {
461 wakaba 1.104 $self->{onerror}->(node => $attr,
462     type => 'unknown link type',
463     value => $word,
464     level => $self->{level}->{uncertain});
465 wakaba 1.1 }
466     }
467 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
468 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
469     ## says that using both X-Pingback: header field and HTML
470     ## <link rel=pingback> is deprecated and if both appears they
471     ## SHOULD contain exactly the same value.
472     ## ISSUE: Pingback 1.0 specification defines the exact representation
473     ## of its link element, which cannot be tested by the current arch.
474     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
475     ## include any string that matches to the pattern for the rel=pingback link,
476     ## which again inpossible to test.
477     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
478 wakaba 1.12
479     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
480 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
481     ## then they SHOULD be described in different paragraphs.".
482 wakaba 1.66
483     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
484     if ($is_hyperlink or $a_or_area) {
485     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
486     }
487     if ($is_resource and not $a_or_area) {
488     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
489     }
490 wakaba 1.96
491     $element_state->{link_rel} = \%word;
492 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
493 wakaba 1.20
494     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
495 wakaba 1.1
496     ## URI (or IRI)
497     my $HTMLURIAttrChecker = sub {
498 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
499 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
500     my $value = $attr->value;
501     Whatpm::URIChecker->check_iri_reference ($value, sub {
502 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
503 wakaba 1.106 }), $self->{level};
504 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
505 wakaba 1.66
506     my $attr_name = $attr->name;
507     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
508     ## TODO: absolute
509     push @{$self->{return}->{uri}->{$value} ||= []},
510     $element_state->{uri_info}->{$attr_name};
511 wakaba 1.1 }; # $HTMLURIAttrChecker
512    
513     ## A space separated list of one or more URIs (or IRIs)
514     my $HTMLSpaceURIsAttrChecker = sub {
515     my ($self, $attr) = @_;
516 wakaba 1.66
517     my $type = {ping => 'action',
518     profile => 'namespace',
519     archive => 'resource'}->{$attr->name};
520    
521 wakaba 1.1 my $i = 0;
522 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
523 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
524 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
525 wakaba 1.106 }, $self->{level});
526 wakaba 1.66
527     ## TODO: absolute
528     push @{$self->{return}->{uri}->{$value} ||= []},
529 wakaba 1.67 {node => $attr, type => {$type => 1}};
530 wakaba 1.66
531 wakaba 1.1 $i++;
532     }
533 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
534 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
535     ## ISSUE: A sequence of white space characters are conformant?
536     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
537 wakaba 1.132 ## ISSUE: What is "space"?
538 wakaba 1.1 ## NOTE: Duplication seems not an error.
539 wakaba 1.4 $self->{has_uri_attr} = 1;
540 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
541    
542 wakaba 1.156 my $ValidEmailAddress;
543     {
544     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
545     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
546     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
547     }
548    
549 wakaba 1.168 ## Valid global date and time.
550     my $GetDateTimeAttrChecker = sub ($) {
551     my $type = shift;
552     return sub {
553     my ($self, $attr, $item, $element_state) = @_;
554    
555     my $range_error;
556    
557     require Message::Date;
558     my $dp = Message::Date->new;
559     $dp->{level} = $self->{level};
560     $dp->{onerror} = sub {
561     my %opt = @_;
562     unless ($opt{type} eq 'date value not supported') {
563     $self->{onerror}->(%opt, node => $attr);
564     $range_error = '';
565     }
566     };
567    
568     my $method = 'parse_' . $type;
569     my $d = $dp->$method ($attr->value);
570     $element_state->{date_value}->{$attr->name} = $d || $range_error;
571     };
572     }; # $GetDateTimeAttrChecker
573 wakaba 1.1
574     my $HTMLIntegerAttrChecker = sub {
575     my ($self, $attr) = @_;
576     my $value = $attr->value;
577     unless ($value =~ /\A-?[0-9]+\z/) {
578 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
579     level => $self->{level}->{must});
580 wakaba 1.1 }
581     }; # $HTMLIntegerAttrChecker
582    
583     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
584     my $range_check = shift;
585     return sub {
586     my ($self, $attr) = @_;
587     my $value = $attr->value;
588     if ($value =~ /\A[0-9]+\z/) {
589     unless ($range_check->($value + 0)) {
590 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
591     level => $self->{level}->{must});
592 wakaba 1.1 }
593     } else {
594     $self->{onerror}->(node => $attr,
595 wakaba 1.104 type => 'nninteger:syntax error',
596     level => $self->{level}->{must});
597 wakaba 1.1 }
598     };
599     }; # $GetHTMLNonNegativeIntegerAttrChecker
600    
601     my $GetHTMLFloatingPointNumberAttrChecker = sub {
602     my $range_check = shift;
603     return sub {
604 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
605 wakaba 1.1 my $value = $attr->value;
606 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
607     $value =~ /\A-?\.[0-9]+\z/) {
608 wakaba 1.168 if ($range_check->($value + 0)) {
609     ## TODO: parse algorithm
610     $element_state->{number_value}->{$attr->name} = $value + 0;
611     } else {
612 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
613     level => $self->{level}->{must});
614 wakaba 1.1 }
615     } else {
616     $self->{onerror}->(node => $attr,
617 wakaba 1.104 type => 'float:syntax error',
618     level => $self->{level}->{must});
619 wakaba 1.1 }
620     };
621 wakaba 1.144
622     ## TODO: scientific notation
623 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
624    
625 wakaba 1.148 my $StepAttrChecker = sub {
626     ## NOTE: A valid floating point number (> 0), or ASCII
627     ## case-insensitive "any".
628    
629     my ($self, $attr) = @_;
630     my $value = $attr->value;
631     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
632     $value =~ /\A-?\.[0-9]+\z/) {
633     unless ($value > 0) {
634     $self->{onerror}->(node => $attr, type => 'float:out of range',
635     level => $self->{level}->{must});
636     }
637     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
638     #
639     } else {
640     $self->{onerror}->(node => $attr,
641     type => 'float:syntax error',
642     level => $self->{level}->{must});
643     }
644    
645     ## TODO: scientific
646     }; # $StepAttrChecker
647    
648 wakaba 1.86 ## HTML4 %Length;
649     my $HTMLLengthAttrChecker = sub {
650     my ($self, $attr) = @_;
651     my $value = $attr->value;
652     unless ($value =~ /\A[0-9]+%?\z/) {
653     $self->{onerror}->(node => $attr, type => 'length:syntax error',
654 wakaba 1.104 level => $self->{level}->{must});
655 wakaba 1.86 }
656    
657     ## NOTE: HTML4 definition is too vague - it does not define the syntax
658     ## of percentage value at all (!).
659     }; # $HTMLLengthAttrChecker
660    
661 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
662     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
663     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
664    
665 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
666     ## ISSUE: RFC 2046 does not define syntax of media types.
667     ## ISSUE: The definition of "a valid MIME type" is unknown.
668     ## Syntactical correctness?
669     my $HTMLIMTAttrChecker = sub {
670     my ($self, $attr) = @_;
671     my $value = $attr->value;
672     ## ISSUE: RFC 2045 Content-Type header field allows insertion
673     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
674     ## ISSUE: RFC 2231 extension? Maybe no.
675     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
676     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
677 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
678 wakaba 1.1 my @type = ($1, $2);
679     my $param = $3;
680 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
681 wakaba 1.1 if (defined $2) {
682     push @type, $1 => $2;
683     } else {
684     my $n = $1;
685 wakaba 1.152 my $v = $3;
686 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
687 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
688 wakaba 1.1 }
689     }
690     require Whatpm::IMTChecker;
691 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
692     $ic->{level} = $self->{level};
693     $ic->check_imt (sub {
694 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
695 wakaba 1.1 }, @type);
696     } else {
697 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
698     level => $self->{level}->{must});
699 wakaba 1.1 }
700     }; # $HTMLIMTAttrChecker
701    
702     my $HTMLLanguageTagAttrChecker = sub {
703 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
704    
705 wakaba 1.1 my ($self, $attr) = @_;
706 wakaba 1.6 my $value = $attr->value;
707     require Whatpm::LangTag;
708     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
709 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
710 wakaba 1.106 }, $self->{level});
711 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
712 wakaba 1.6
713     ## TODO: testdata
714 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
715    
716     ## "A valid media query [MQ]"
717     my $HTMLMQAttrChecker = sub {
718     my ($self, $attr) = @_;
719 wakaba 1.104 $self->{onerror}->(node => $attr,
720     type => 'media query',
721     level => $self->{level}->{uncertain});
722 wakaba 1.1 ## ISSUE: What is "a valid media query"?
723     }; # $HTMLMQAttrChecker
724    
725     my $HTMLEventHandlerAttrChecker = sub {
726     my ($self, $attr) = @_;
727 wakaba 1.104 $self->{onerror}->(node => $attr,
728     type => 'event handler',
729     level => $self->{level}->{uncertain});
730 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
731     ## ECMAScript |FunctionBody| production. [ECMA262]
732     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
733     ## ISSUE: Automatic semicolon insertion does not apply?
734     ## ISSUE: Other script languages?
735     }; # $HTMLEventHandlerAttrChecker
736    
737 wakaba 1.136 my $HTMLFormAttrChecker = sub {
738     my ($self, $attr) = @_;
739    
740     ## NOTE: MUST be the ID of a |form| element.
741    
742     my $value = $attr->value;
743 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
744 wakaba 1.136
745     ## ISSUE: <form id=""><input form=""> (empty ID)?
746     }; # $HTMLFormAttrChecker
747    
748 wakaba 1.158 my $ListAttrChecker = sub {
749     my ($self, $attr) = @_;
750    
751     ## NOTE: MUST be the ID of a |datalist| element.
752    
753     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
754    
755     ## TODO: Warn violation to control-dependent restrictions. For
756     ## example, |<input type=url maxlength=10 list=a> <datalist
757     ## id=a><option value=nonurlandtoolong></datalist>| should be
758     ## warned.
759     }; # $ListAttrChecker
760    
761 wakaba 1.160 my $PatternAttrChecker = sub {
762     my ($self, $attr) = @_;
763     $self->{onsubdoc}->({s => $attr->value,
764     container_node => $attr,
765     media_type => 'text/x-regexp-js',
766     is_char_string => 1});
767 wakaba 1.161
768     ## ISSUE: "value must match the Pattern production of ECMA 262's
769     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
770    
771     ## TODO: Warn if @value does not match @pattern.
772 wakaba 1.160 }; # $PatternAttrChecker
773    
774 wakaba 1.161 my $AcceptAttrChecker = sub {
775     my ($self, $attr) = @_;
776    
777     my $value = $attr->value;
778     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
779     my @value = length $value ? split /,/, $value, -1 : ('');
780     my %has_value;
781     for my $v (@value) {
782     if ($has_value{$v}) {
783     $self->{onerror}->(node => $attr,
784     type => 'duplicate token',
785     value => $v,
786     level => $self->{level}->{must});
787     next;
788     }
789     $has_value{$v} = 1;
790    
791     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
792     #
793     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
794     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
795     ## define its own syntax citing RFC 4288.
796    
797     ## NOTE: Parameters not allowed.
798     require Whatpm::IMTChecker;
799     my $ic = Whatpm::IMTChecker->new;
800     $ic->{level} = $self->{level};
801     $ic->check_imt (sub {
802     $self->{onerror}->(@_, node => $attr);
803     }, $1, $2);
804     } else {
805     $self->{onerror}->(node => $attr,
806     type => 'IMTnp:syntax error', ## TODOC: type
807     value => $v,
808     level => $self->{level}->{must});
809     }
810     }
811     }; # $AcceptAttrChecker
812    
813 wakaba 1.165 my $FormControlNameAttrChecker = sub {
814     my ($self, $attr) = @_;
815    
816     unless (length $attr->value) {
817     $self->{onerror}->(node => $attr,
818     type => 'empty control name', ## TODOC: type
819     level => $self->{level}->{must});
820     }
821    
822     ## NOTE: No uniqueness constraint.
823     }; # $FormControlNameAttrChecker
824    
825     my $AutofocusAttrChecker = sub {
826     my ($self, $attr) = @_;
827    
828     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
829    
830     if ($self->{has_autofocus}) {
831     $self->{onerror}->(node => $attr,
832     type => 'duplicate autofocus', ## TODOC: type
833     level => $self->{level}->{must});
834     }
835     $self->{has_autofocus} = 1;
836     }; # $AutofocusAttrChekcer
837    
838 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
839     my ($self, $attr) = @_;
840 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
841 wakaba 1.1 my $value = $attr->value;
842     if ($value =~ s/^#//) {
843 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
844     ## according to the "rules for parsing a hash-name reference" algorithm.
845     ## The document is non-conforming anyway, since |<map name="">| (empty
846     ## name) is non-conforming.
847 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
848     } else {
849 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
850     level => $self->{level}->{must});
851 wakaba 1.1 }
852 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
853 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
854     }; # $HTMLUsemapAttrChecker
855    
856 wakaba 1.76 ## Valid browsing context name
857     my $HTMLBrowsingContextNameAttrChecker = sub {
858     my ($self, $attr) = @_;
859     my $value = $attr->value;
860     if ($value =~ /^_/) {
861     $self->{onerror}->(node => $attr, type => 'window name:reserved',
862 wakaba 1.104 level => $self->{level}->{must},
863 wakaba 1.76 value => $value);
864     } elsif (length $value) {
865     #
866     } else {
867     $self->{onerror}->(node => $attr, type => 'window name:empty',
868 wakaba 1.104 level => $self->{level}->{must});
869 wakaba 1.76 }
870     }; # $HTMLBrowsingContextNameAttrChecker
871    
872     ## Valid browsing context name or keyword
873 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
874     my ($self, $attr) = @_;
875     my $value = $attr->value;
876     if ($value =~ /^_/) {
877     $value = lc $value; ## ISSUE: ASCII case-insentitive?
878     unless ({
879 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
880 wakaba 1.1 }->{$value}) {
881     $self->{onerror}->(node => $attr,
882 wakaba 1.76 type => 'window name:reserved',
883 wakaba 1.104 level => $self->{level}->{must},
884 wakaba 1.76 value => $value);
885 wakaba 1.1 }
886 wakaba 1.76 } elsif (length $value) {
887     #
888 wakaba 1.1 } else {
889 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
890 wakaba 1.104 level => $self->{level}->{must});
891 wakaba 1.1 }
892     }; # $HTMLTargetAttrChecker
893    
894 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
895     my ($self, $attr) = @_;
896    
897     ## ISSUE: Namespace resolution?
898    
899     my $value = $attr->value;
900    
901     require Whatpm::CSS::SelectorsParser;
902     my $p = Whatpm::CSS::SelectorsParser->new;
903     $p->{pseudo_class}->{$_} = 1 for qw/
904     active checked disabled empty enabled first-child first-of-type
905     focus hover indeterminate last-child last-of-type link only-child
906     only-of-type root target visited
907     lang nth-child nth-last-child nth-of-type nth-last-of-type not
908     -manakai-contains -manakai-current
909     /;
910    
911     $p->{pseudo_element}->{$_} = 1 for qw/
912     after before first-letter first-line
913     /;
914    
915 wakaba 1.104 $p->{level} = $self->{level};
916 wakaba 1.23 $p->{onerror} = sub {
917 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
918 wakaba 1.23 };
919     $p->parse_string ($value);
920     }; # $HTMLSelectorsAttrChecker
921    
922 wakaba 1.66 my $HTMLAccesskeyAttrChecker = sub {
923     my ($self, $attr) = @_;
924    
925     ## NOTE: "character" or |%Character;| in HTML4.
926    
927     my $value = $attr->value;
928     if (length $value != 1) {
929     $self->{onerror}->(node => $attr, type => 'char:syntax error',
930 wakaba 1.105 level => $self->{level}->{html4_fact});
931 wakaba 1.66 }
932    
933     ## NOTE: "Note. Authors should consider the input method of the expected
934     ## reader when specifying an accesskey." [HTML4] This is hard to implement,
935     ## since it depends on keyboard and so on.
936     ## NOTE: "We recommend that authors include the access key in label text
937     ## or wherever the access key is to apply." [HTML4] (informative)
938     }; # $HTMLAccesskeyAttrChecker
939    
940 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
941     my ($charset_value, $self, $attr, $ascii_compat) = @_;
942    
943     ## NOTE: This code is used for |charset=""| attributes, |charset=|
944     ## portion of the |content=""| attributes, and |accept-charset=""|
945     ## attributes.
946 wakaba 1.91
947     ## NOTE: Though the case-sensitivility of |charset| attribute value
948     ## is not explicitly spelled in the HTML5 spec, the Character Set
949     ## registry of IANA, which is referenced from HTML5 spec, says that
950     ## charset name is case-insensitive.
951     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
952    
953     require Message::Charset::Info;
954     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
955    
956     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
957     ## Syntactically valid and registered? What about x-charset names?
958     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
959     ($charset_value)) {
960     $self->{onerror}->(node => $attr,
961 wakaba 1.104 type => 'charset:syntax error',
962     value => $charset_value,
963     level => $self->{level}->{must});
964 wakaba 1.91 }
965    
966     if ($charset) {
967     ## ISSUE: What is "the preferred name for that encoding" (for a charset
968     ## with no "preferred MIME name" label)?
969     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
970     if (($charset_status &
971     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
972     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
973     $self->{onerror}->(node => $attr,
974 wakaba 1.104 type => 'charset:not preferred',
975     value => $charset_value,
976     level => $self->{level}->{must});
977 wakaba 1.91 }
978 wakaba 1.129
979 wakaba 1.91 if (($charset_status &
980     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
981     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
982     if ($charset_value =~ /^x-/) {
983     $self->{onerror}->(node => $attr,
984 wakaba 1.104 type => 'charset:private',
985     value => $charset_value,
986     level => $self->{level}->{good});
987 wakaba 1.91 } else {
988     $self->{onerror}->(node => $attr,
989 wakaba 1.104 type => 'charset:not registered',
990     value => $charset_value,
991     level => $self->{level}->{good});
992 wakaba 1.91 }
993     }
994 wakaba 1.129
995     if ($ascii_compat) {
996     if ($charset->{category} &
997     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
998     #
999     } else {
1000     $self->{onerror}->(node => $attr,
1001     type => 'charset:not ascii compat',
1002     value => $charset_value,
1003     level => $self->{level}->{must});
1004     }
1005     }
1006    
1007 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
1008     } elsif ($charset_value =~ /^x-/) {
1009     $self->{onerror}->(node => $attr,
1010 wakaba 1.104 type => 'charset:private',
1011     value => $charset_value,
1012     level => $self->{level}->{good});
1013 wakaba 1.129
1014     ## NOTE: Whether this is an ASCII-compatible character encoding or
1015     ## not is unknown.
1016 wakaba 1.91 } else {
1017     $self->{onerror}->(node => $attr,
1018 wakaba 1.104 type => 'charset:not registered',
1019     value => $charset_value,
1020     level => $self->{level}->{good});
1021 wakaba 1.129
1022     ## NOTE: Whether this is an ASCII-compatible character encoding or
1023     ## not is unknown.
1024 wakaba 1.91 }
1025    
1026     return ($charset, $charset_value);
1027     }; # $HTMLCharsetChecker
1028    
1029 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1030     ## MUST be the preferred name of an ASCII-compatible character
1031     ## encoding".
1032     my $HTMLCharsetsAttrChecker = sub {
1033     my ($self, $attr) = @_;
1034    
1035     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1036    
1037 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1038 wakaba 1.129
1039     ## ISSUE: Uniqueness is not enforced.
1040    
1041     for my $charset (@value) {
1042     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1043     }
1044    
1045     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1046     }; # $HTMLCharsetsAttrChecker
1047    
1048 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1049     my ($self, $attr) = @_;
1050    
1051     ## NOTE: HTML4 "color" or |%Color;|
1052    
1053     my $value = $attr->value;
1054    
1055     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1056 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1057 wakaba 1.105 level => $self->{level}->{html4_fact});
1058 wakaba 1.68 }
1059    
1060     ## TODO: HTML4 has some guideline on usage of color.
1061     }; # $HTMLColorAttrChecker
1062    
1063 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1064     my ($self, $attr) = @_;
1065     $HTMLURIAttrChecker->(@_);
1066    
1067     my $attr_name = $attr->name;
1068    
1069     if ($attr_name eq 'ref') {
1070     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1071     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1072 wakaba 1.104 level => $self->{level}->{must});
1073 wakaba 1.79 }
1074     }
1075 wakaba 1.155
1076     require Message::URL;
1077 wakaba 1.79 my $doc = $attr->owner_document;
1078     my $doc_uri = $doc->document_uri;
1079 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1080 wakaba 1.79 my $no_frag_uri = $uri->clone;
1081     $no_frag_uri->uri_fragment (undef);
1082     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1083     (not defined $doc_uri and $no_frag_uri eq '')) {
1084     my $fragid = $uri->uri_fragment;
1085     if (defined $fragid) {
1086     push @{$self->{$attr_name}}, [$fragid => $attr];
1087     } else {
1088     DOCEL: {
1089     last DOCEL unless $attr_name eq 'template';
1090    
1091     my $docel = $doc->document_element;
1092     if ($docel) {
1093     my $nsuri = $docel->namespace_uri;
1094     if (defined $nsuri and $nsuri eq $HTML_NS) {
1095     if ($docel->manakai_local_name eq 'datatemplate') {
1096     last DOCEL;
1097     }
1098     }
1099     }
1100    
1101     $self->{onerror}->(node => $attr, type => 'template:not template',
1102 wakaba 1.104 level => $self->{level}->{must});
1103 wakaba 1.79 } # DOCEL
1104     }
1105     } else {
1106     ## TODO: An external document is referenced.
1107     ## The document MUST be an HTML or XML document.
1108     ## If there is a fragment identifier, it MUST point a part of the doc.
1109     ## If the attribute is |template|, the pointed part MUST be a
1110     ## |datatemplat| element.
1111     ## If no fragment identifier is specified, the root element MUST be
1112     ## a |datatemplate| element when the attribute is |template|.
1113     }
1114     }; # $HTMLRefOrTemplateAttrChecker
1115    
1116 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1117     my ($self, $attr) = @_;
1118    
1119     if (defined $attr->namespace_uri) {
1120     my $oe = $attr->owner_element;
1121     my $oe_nsuri = $oe->namespace_uri;
1122 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1123 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1124 wakaba 1.104 level => $self->{level}->{must});
1125 wakaba 1.83 }
1126     }
1127    
1128     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1129     }; # $HTMLRepeatIndexAttrChecker
1130    
1131 wakaba 1.1 my $HTMLAttrChecker = {
1132 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1133 wakaba 1.1 id => sub {
1134 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1135 wakaba 1.1 my $value = $attr->value;
1136     if (length $value > 0) {
1137     if ($self->{id}->{$value}) {
1138 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1139     level => $self->{level}->{must});
1140 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1141     } else {
1142     $self->{id}->{$value} = [$attr];
1143 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1144 wakaba 1.1 }
1145 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1146 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1147     level => $self->{level}->{must});
1148 wakaba 1.1 }
1149     } else {
1150     ## NOTE: MUST contain at least one character
1151 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1152     level => $self->{level}->{must});
1153 wakaba 1.1 }
1154     },
1155     title => sub {}, ## NOTE: No conformance creteria
1156     lang => sub {
1157     my ($self, $attr) = @_;
1158 wakaba 1.6 my $value = $attr->value;
1159     if ($value eq '') {
1160     #
1161     } else {
1162     require Whatpm::LangTag;
1163     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1164 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1165 wakaba 1.106 }, $self->{level});
1166 wakaba 1.6 }
1167 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1168 wakaba 1.6
1169     ## TODO: test data
1170 wakaba 1.111
1171     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1172     ## non-conforming. Such errors are detected by the checkers of
1173     ## |{}xml:lang| and |{xml}:lang| attributes.
1174 wakaba 1.1 },
1175     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1176     class => sub {
1177     my ($self, $attr) = @_;
1178 wakaba 1.132
1179     ## NOTE: "Unordered set of unique space-separated tokens".
1180    
1181 wakaba 1.1 my %word;
1182 wakaba 1.132 for my $word (grep {length $_}
1183     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1184 wakaba 1.1 unless ($word{$word}) {
1185     $word{$word} = 1;
1186     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1187     } else {
1188 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1189     value => $word,
1190     level => $self->{level}->{must});
1191 wakaba 1.1 }
1192     }
1193     },
1194 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1195     true => 1, false => 1, '' => 1,
1196     }),
1197 wakaba 1.1 contextmenu => sub {
1198     my ($self, $attr) = @_;
1199     my $value = $attr->value;
1200 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1201 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1202     ## What is "in the DOM"? A menu Element node that is not part
1203     ## of the Document tree is in the DOM? A menu Element node that
1204     ## belong to another Document tree is in the DOM?
1205     },
1206 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1207 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1208 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1209     registrationmark => sub {
1210     my ($self, $attr, $item, $element_state) = @_;
1211    
1212     ## NOTE: Any value is conforming.
1213    
1214     if ($self->{flag}->{in_rule}) {
1215     my $el = $attr->owner_element;
1216     my $ln = $el->manakai_local_name;
1217     if ($ln eq 'nest' or
1218     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1219     my $nsuri = $el->namespace_uri;
1220     if (defined $nsuri and $nsuri eq $HTML_NS) {
1221     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1222 wakaba 1.104 level => $self->{level}->{must});
1223 wakaba 1.79 }
1224     }
1225     } else {
1226     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1227 wakaba 1.104 level => $self->{level}->{must});
1228 wakaba 1.79 }
1229     },
1230 wakaba 1.80 repeat => sub {
1231     my ($self, $attr) = @_;
1232 wakaba 1.83
1233     if (defined $attr->namespace_uri) {
1234     my $oe = $attr->owner_element;
1235     my $oe_nsuri = $oe->namespace_uri;
1236     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1237     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1238 wakaba 1.104 level => $self->{level}->{must});
1239 wakaba 1.83 }
1240     }
1241    
1242 wakaba 1.80 my $value = $attr->value;
1243     if ($value eq 'template') {
1244     #
1245     } elsif ($value =~ /\A-?[0-9]+\z/) {
1246     #
1247     } else {
1248     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1249 wakaba 1.104 level => $self->{level}->{must});
1250 wakaba 1.80 }
1251    
1252     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1253     ## that the attribute MAY be specified to any element, or that the
1254     ## element with that attribute (i.e. a repetition template) can be
1255     ## inserted anywhere in a document tree?
1256     },
1257 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1258     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1259     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1260 wakaba 1.80 'repeat-template' => sub {
1261 wakaba 1.83 my ($self, $attr) = @_;
1262    
1263     if (defined $attr->namespace_uri) {
1264     my $oe = $attr->owner_element;
1265     my $oe_nsuri = $oe->namespace_uri;
1266 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1267 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1268 wakaba 1.104 level => $self->{level}->{must});
1269 wakaba 1.83 }
1270     }
1271    
1272 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1273     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1274     ## attribute allowed on an element that is not a repetition block?
1275     },
1276 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1277 wakaba 1.128 style => sub {
1278     my ($self, $attr) = @_;
1279    
1280     $self->{onsubdoc}->({s => $attr->value,
1281     container_node => $attr,
1282     media_type => 'text/x-css-inline',
1283     is_char_string => 1});
1284    
1285     ## NOTE: "... MUST still be comprehensible and usable if those
1286     ## attributes were removed" is a semantic requirement, it cannot
1287     ## be tested.
1288     },
1289 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1290 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1291 wakaba 1.111 'xml:lang' => sub {
1292     my ($self, $attr) = @_;
1293    
1294     if ($attr->owner_document->manakai_is_html) {
1295     $self->{onerror}->(type => 'in HTML:xml:lang',
1296     level => $self->{level}->{info},
1297     node => $attr);
1298     ## NOTE: This is not an error, but the attribute will be ignored.
1299     } else {
1300     $self->{onerror}->(type => 'in XML:xml:lang',
1301     level => $self->{level}->{html5_no_may},
1302     node => $attr);
1303     ## TODO: We need to add test for this error.
1304     }
1305    
1306     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1307     (undef, 'lang');
1308     if ($lang_attr) {
1309     my $lang_attr_value = $lang_attr->value;
1310     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1311     my $value = $attr->value;
1312     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1313     if ($lang_attr_value ne $value) {
1314     $self->{onerror}->(type => 'xml:lang ne lang',
1315     level => $self->{level}->{must},
1316     node => $attr);
1317     }
1318     } else {
1319     $self->{onerror}->(type => 'xml:lang not allowed',
1320     level => $self->{level}->{must},
1321     node => $attr);
1322     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1323     }
1324     },
1325 wakaba 1.74 xmlns => sub {
1326     my ($self, $attr) = @_;
1327     my $value = $attr->value;
1328     unless ($value eq $HTML_NS) {
1329 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1330     level => $self->{level}->{must});
1331 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1332     }
1333     unless ($attr->owner_document->manakai_is_html) {
1334 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1335     level => $self->{level}->{must});
1336 wakaba 1.74 ## TODO: Test
1337     }
1338    
1339     ## TODO: Should be resolved?
1340     push @{$self->{return}->{uri}->{$value} ||= []},
1341     {node => $attr, type => {namespace => 1}};
1342     },
1343 wakaba 1.1 };
1344    
1345 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1346    
1347 wakaba 1.49 my %HTMLAttrStatus = (
1348 wakaba 1.153 class => FEATURE_HTML5_WD,
1349 wakaba 1.50 contenteditable => FEATURE_HTML5_DEFAULT,
1350     contextmenu => FEATURE_HTML5_WD,
1351 wakaba 1.153 dir => FEATURE_HTML5_WD,
1352 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1353 wakaba 1.115 hidden => FEATURE_HTML5_DEFAULT,
1354 wakaba 1.153 id => FEATURE_HTML5_WD,
1355 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1356 wakaba 1.153 lang => FEATURE_HTML5_WD,
1357 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1358     registrationmark => FEATURE_HTML5_AT_RISK,
1359 wakaba 1.60 repeat => FEATURE_WF2,
1360     'repeat-max' => FEATURE_WF2,
1361     'repeat-min' => FEATURE_WF2,
1362     'repeat-start' => FEATURE_WF2,
1363     'repeat-template' => FEATURE_WF2,
1364 wakaba 1.154 role => 0,
1365 wakaba 1.153 style => FEATURE_HTML5_WD,
1366 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1367     template => FEATURE_HTML5_AT_RISK,
1368 wakaba 1.153 title => FEATURE_HTML5_WD,
1369 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1370 wakaba 1.49 );
1371    
1372     my %HTMLM12NCommonAttrStatus = (
1373 wakaba 1.154 about => FEATURE_RDFA_REC,
1374 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1375 wakaba 1.154 content => FEATURE_RDFA_REC,
1376     datatype => FEATURE_RDFA_REC,
1377 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1378 wakaba 1.154 href => FEATURE_RDFA_REC,
1379 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1380 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1381 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1382     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1383     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1384     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1385     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1386     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1387     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1388     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1389     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1390     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1391 wakaba 1.154 property => FEATURE_RDFA_REC,
1392     rel => FEATURE_RDFA_REC,
1393     resource => FEATURE_RDFA_REC,
1394     rev => FEATURE_RDFA_REC,
1395 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1396 wakaba 1.78 # FEATURE_M12N10_REC,
1397 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1398 wakaba 1.55 FEATURE_M12N10_REC,
1399 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1400 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1401 wakaba 1.49 );
1402    
1403 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1404     ## Core
1405 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1406     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1407 wakaba 1.82 #xml:id
1408     layout => FEATURE_XHTML2_ED,
1409 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1410 wakaba 1.82
1411     ## Hypertext
1412     cite => FEATURE_XHTML2_ED,
1413     href => FEATURE_XHTML2_ED,
1414     hreflang => FEATURE_XHTML2_ED,
1415     hrefmedia => FEATURE_XHTML2_ED,
1416     hreftype => FEATURE_XHTML2_ED,
1417     nextfocus => FEATURE_XHTML2_ED,
1418     prevfocus => FEATURE_XHTML2_ED,
1419     target => FEATURE_XHTML2_ED,
1420     #xml:base
1421    
1422     ## I18N
1423     #xml:lang
1424    
1425     ## Bi-directional
1426 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1427 wakaba 1.82
1428     ## Edit
1429     edit => FEATURE_XHTML2_ED,
1430     datetime => FEATURE_XHTML2_ED,
1431    
1432     ## Embedding
1433     encoding => FEATURE_XHTML2_ED,
1434     src => FEATURE_XHTML2_ED,
1435     srctype => FEATURE_XHTML2_ED,
1436    
1437     ## Image Map
1438     usemap => FEATURE_XHTML2_ED,
1439     ismap => FEATURE_XHTML2_ED,
1440     shape => FEATURE_XHTML2_ED,
1441     coords => FEATURE_XHTML2_ED,
1442    
1443     ## Media
1444     media => FEATURE_XHTML2_ED,
1445    
1446     ## Metadata
1447     about => FEATURE_XHTML2_ED,
1448     content => FEATURE_XHTML2_ED,
1449     datatype => FEATURE_XHTML2_ED,
1450     instanceof => FEATURE_XHTML2_ED,
1451     property => FEATURE_XHTML2_ED,
1452     rel => FEATURE_XHTML2_ED,
1453     resource => FEATURE_XHTML2_ED,
1454     rev => FEATURE_XHTML2_ED,
1455    
1456     ## Role
1457 wakaba 1.154 role => FEATURE_XHTML2_ED,
1458 wakaba 1.82
1459     ## Style
1460 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML2_ED, # "strongly discouraged"
1461 wakaba 1.82 );
1462    
1463     my %HTMLM12NXHTML2CommonAttrStatus = (
1464     %HTMLM12NCommonAttrStatus,
1465     %XHTML2CommonAttrStatus,
1466    
1467 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1468 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1469 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1470     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1471 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1472 wakaba 1.154 href => FEATURE_RDFA_REC,
1473 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1474 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1475     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1476     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1477     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1478     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1479 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1480 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1481 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1482 wakaba 1.82 FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1483 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1484 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1485 wakaba 1.82 );
1486    
1487 wakaba 1.1 for (qw/
1488     onabort onbeforeunload onblur onchange onclick oncontextmenu
1489     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1490     ondragstart ondrop onerror onfocus onkeydown onkeypress
1491     onkeyup onload onmessage onmousedown onmousemove onmouseout
1492     onmouseover onmouseup onmousewheel onresize onscroll onselect
1493 wakaba 1.77 onstorage onsubmit onunload
1494 wakaba 1.1 /) {
1495     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1496 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1497 wakaba 1.1 }
1498    
1499 wakaba 1.170 for (qw/
1500     ondataunavailable
1501     /) {
1502     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1503     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1504     }
1505    
1506 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1507     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1508     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1509    
1510     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1511     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1512     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1513     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1514     }
1515    
1516 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1517 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1518 wakaba 1.82 }
1519 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1520     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1521 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1522     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1523     ismap layout media nextfocus prevfocus shape src srctype style
1524     target usemap/) {
1525     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1526     }
1527     for (qw/class dir id title/) {
1528     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1529     }
1530     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1531     onmouseout onkeypress onkeydown onkeyup/) {
1532     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1533     }
1534    
1535 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1536     ## NOTE: "Authors should ... when the attributes are ignored and
1537     ## any associated CSS dropped, the page is still usable." (semantic
1538     ## constraint.)
1539     }; # $HTMLDatasetAttrChecker
1540    
1541 wakaba 1.153 my $HTMLDatasetAttrStatus = FEATURE_HTML5_WD;
1542 wakaba 1.73
1543 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1544     my $element_specific_checker = shift;
1545 wakaba 1.49 my $element_specific_status = shift;
1546 wakaba 1.1 return sub {
1547 wakaba 1.40 my ($self, $item, $element_state) = @_;
1548     for my $attr (@{$item->{node}->attributes}) {
1549 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1550     $attr_ns = '' unless defined $attr_ns;
1551     my $attr_ln = $attr->manakai_local_name;
1552     my $checker;
1553 wakaba 1.73 my $status;
1554 wakaba 1.1 if ($attr_ns eq '') {
1555 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1556     $attr_ln !~ /[A-Z]/) {
1557 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1558     $status = $HTMLDatasetAttrStatus;
1559     } else {
1560     $checker = $element_specific_checker->{$attr_ln}
1561     || $HTMLAttrChecker->{$attr_ln};
1562     $status = $element_specific_status->{$attr_ln};
1563     }
1564 wakaba 1.1 }
1565     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1566 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1567 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1568     || $AttrStatus->{$attr_ns}->{''};
1569     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1570 wakaba 1.1 if ($checker) {
1571 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1572 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1573 wakaba 1.54 #
1574 wakaba 1.1 } else {
1575 wakaba 1.104 $self->{onerror}->(node => $attr,
1576     type => 'unknown attribute',
1577     level => $self->{level}->{uncertain});
1578 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1579     }
1580 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1581 wakaba 1.1 }
1582     };
1583     }; # $GetHTMLAttrsChecker
1584    
1585 wakaba 1.40 my %HTMLChecker = (
1586     %Whatpm::ContentChecker::AnyChecker,
1587 wakaba 1.79 check_start => sub {
1588     my ($self, $item, $element_state) = @_;
1589    
1590     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1591     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1592     },
1593 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1594 wakaba 1.40 );
1595    
1596     my %HTMLEmptyChecker = (
1597     %HTMLChecker,
1598     check_child_element => sub {
1599     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1600     $child_is_transparent, $element_state) = @_;
1601 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1602     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1603 wakaba 1.40 $self->{onerror}->(node => $child_el,
1604     type => 'element not allowed:minus',
1605 wakaba 1.104 level => $self->{level}->{must});
1606 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1607     #
1608     } else {
1609     $self->{onerror}->(node => $child_el,
1610     type => 'element not allowed:empty',
1611 wakaba 1.104 level => $self->{level}->{must});
1612 wakaba 1.40 }
1613     },
1614     check_child_text => sub {
1615     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1616     if ($has_significant) {
1617     $self->{onerror}->(node => $child_node,
1618     type => 'character not allowed:empty',
1619 wakaba 1.104 level => $self->{level}->{must});
1620 wakaba 1.40 }
1621     },
1622     );
1623    
1624     my %HTMLTextChecker = (
1625     %HTMLChecker,
1626     check_child_element => sub {
1627     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1628     $child_is_transparent, $element_state) = @_;
1629 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1630     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1631 wakaba 1.40 $self->{onerror}->(node => $child_el,
1632     type => 'element not allowed:minus',
1633 wakaba 1.104 level => $self->{level}->{must});
1634 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1635     #
1636     } else {
1637 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1638     level => $self->{level}->{must});
1639 wakaba 1.40 }
1640     },
1641     );
1642    
1643 wakaba 1.72 my %HTMLFlowContentChecker = (
1644 wakaba 1.40 %HTMLChecker,
1645     check_child_element => sub {
1646     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1647     $child_is_transparent, $element_state) = @_;
1648 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1649     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1650 wakaba 1.40 $self->{onerror}->(node => $child_el,
1651     type => 'element not allowed:minus',
1652 wakaba 1.104 level => $self->{level}->{must});
1653 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1654     #
1655     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1656     if ($element_state->{has_non_style} or
1657     not $child_el->has_attribute_ns (undef, 'scoped')) {
1658 wakaba 1.104 $self->{onerror}->(node => $child_el,
1659 wakaba 1.72 type => 'element not allowed:flow style',
1660 wakaba 1.104 level => $self->{level}->{must});
1661 wakaba 1.40 }
1662 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1663 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1664 wakaba 1.40 } else {
1665     $element_state->{has_non_style} = 1;
1666 wakaba 1.104 $self->{onerror}->(node => $child_el,
1667 wakaba 1.72 type => 'element not allowed:flow',
1668 wakaba 1.104 level => $self->{level}->{must})
1669 wakaba 1.40 }
1670     },
1671     check_child_text => sub {
1672     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1673     if ($has_significant) {
1674     $element_state->{has_non_style} = 1;
1675     }
1676     },
1677     check_end => sub {
1678     my ($self, $item, $element_state) = @_;
1679 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1680 wakaba 1.40 if ($element_state->{has_significant}) {
1681 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1682 wakaba 1.40 } elsif ($item->{transparent}) {
1683     #
1684     } else {
1685     $self->{onerror}->(node => $item->{node},
1686 wakaba 1.104 level => $self->{level}->{should},
1687 wakaba 1.40 type => 'no significant content');
1688     }
1689     },
1690     );
1691    
1692     my %HTMLPhrasingContentChecker = (
1693     %HTMLChecker,
1694     check_child_element => sub {
1695     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1696     $child_is_transparent, $element_state) = @_;
1697 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1698     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1699 wakaba 1.40 $self->{onerror}->(node => $child_el,
1700     type => 'element not allowed:minus',
1701 wakaba 1.104 level => $self->{level}->{must});
1702 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1703     #
1704     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1705     #
1706     } else {
1707     $self->{onerror}->(node => $child_el,
1708     type => 'element not allowed:phrasing',
1709 wakaba 1.104 level => $self->{level}->{must});
1710 wakaba 1.40 }
1711     },
1712 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1713 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1714 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1715 wakaba 1.40 ## and |check_child_text|.
1716     );
1717    
1718 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1719 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1720 wakaba 1.46 ## with parent?
1721 wakaba 1.40
1722 wakaba 1.1 our $Element;
1723     our $ElementDefault;
1724    
1725     $Element->{$HTML_NS}->{''} = {
1726 wakaba 1.40 %HTMLChecker,
1727 wakaba 1.1 };
1728    
1729     $Element->{$HTML_NS}->{html} = {
1730 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1731 wakaba 1.1 is_root => 1,
1732 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1733 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1734 wakaba 1.67 version => sub {
1735     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1736     ## Though DTDs of various versions of HTML define the attribute
1737     ## as |#FIXED|, this conformance checker does no check for
1738     ## the attribute value, since what kind of check should be done
1739     ## is unknown.
1740     },
1741 wakaba 1.49 }, {
1742     %HTMLAttrStatus,
1743 wakaba 1.82 %XHTML2CommonAttrStatus,
1744 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1745     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1746     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1747     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1748     manifest => FEATURE_HTML5_WD,
1749 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1750 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1751 wakaba 1.1 }),
1752 wakaba 1.40 check_start => sub {
1753     my ($self, $item, $element_state) = @_;
1754     $element_state->{phase} = 'before head';
1755 wakaba 1.79
1756 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1757 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1758     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1759 wakaba 1.40 },
1760     check_child_element => sub {
1761     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1762     $child_is_transparent, $element_state) = @_;
1763 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1764     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1765 wakaba 1.40 $self->{onerror}->(node => $child_el,
1766     type => 'element not allowed:minus',
1767 wakaba 1.104 level => $self->{level}->{must});
1768 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1769     #
1770     } elsif ($element_state->{phase} eq 'before head') {
1771     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1772     $element_state->{phase} = 'after head';
1773     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1774     $self->{onerror}->(node => $child_el,
1775 wakaba 1.104 type => 'ps element missing',
1776     text => 'head',
1777     level => $self->{level}->{must});
1778 wakaba 1.40 $element_state->{phase} = 'after body';
1779     } else {
1780     $self->{onerror}->(node => $child_el,
1781 wakaba 1.104 type => 'element not allowed',
1782     level => $self->{level}->{must});
1783 wakaba 1.40 }
1784     } elsif ($element_state->{phase} eq 'after head') {
1785     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1786     $element_state->{phase} = 'after body';
1787     } else {
1788     $self->{onerror}->(node => $child_el,
1789 wakaba 1.104 type => 'element not allowed',
1790     level => $self->{level}->{must});
1791 wakaba 1.40 }
1792     } elsif ($element_state->{phase} eq 'after body') {
1793     $self->{onerror}->(node => $child_el,
1794 wakaba 1.104 type => 'element not allowed',
1795     level => $self->{level}->{must});
1796 wakaba 1.40 } else {
1797     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1798     }
1799     },
1800     check_child_text => sub {
1801     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1802     if ($has_significant) {
1803     $self->{onerror}->(node => $child_node,
1804 wakaba 1.104 type => 'character not allowed',
1805     level => $self->{level}->{must});
1806 wakaba 1.40 }
1807     },
1808     check_end => sub {
1809     my ($self, $item, $element_state) = @_;
1810     if ($element_state->{phase} eq 'after body') {
1811     #
1812     } elsif ($element_state->{phase} eq 'before head') {
1813     $self->{onerror}->(node => $item->{node},
1814 wakaba 1.104 type => 'child element missing',
1815     text => 'head',
1816     level => $self->{level}->{must});
1817 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1818 wakaba 1.104 type => 'child element missing',
1819     text => 'body',
1820     level => $self->{level}->{must});
1821 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1822     $self->{onerror}->(node => $item->{node},
1823 wakaba 1.104 type => 'child element missing',
1824     text => 'body',
1825     level => $self->{level}->{must});
1826 wakaba 1.40 } else {
1827     die "check_end: Bad |html| phase: $element_state->{phase}";
1828     }
1829 wakaba 1.1
1830 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1831     },
1832     };
1833 wakaba 1.25
1834 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1835 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1836 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1837     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1838     }, {
1839 wakaba 1.49 %HTMLAttrStatus,
1840 wakaba 1.82 %XHTML2CommonAttrStatus,
1841 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1842     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1843     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1844     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1845 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1846     }),
1847 wakaba 1.40 check_child_element => sub {
1848     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1849     $child_is_transparent, $element_state) = @_;
1850 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1851     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1852 wakaba 1.40 $self->{onerror}->(node => $child_el,
1853     type => 'element not allowed:minus',
1854 wakaba 1.104 level => $self->{level}->{must});
1855 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1856     #
1857     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1858     unless ($element_state->{has_title}) {
1859     $element_state->{has_title} = 1;
1860     } else {
1861     $self->{onerror}->(node => $child_el,
1862     type => 'element not allowed:head title',
1863 wakaba 1.104 level => $self->{level}->{must});
1864 wakaba 1.40 }
1865     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1866     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1867     $self->{onerror}->(node => $child_el,
1868     type => 'element not allowed:head style',
1869 wakaba 1.104 level => $self->{level}->{must});
1870 wakaba 1.1 }
1871 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1872     #
1873    
1874     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1875     ## a |meta| element with none of |charset|, |name|,
1876     ## or |http-equiv| attribute is not allowed. It is non-conforming
1877     ## anyway.
1878 wakaba 1.56
1879     ## TODO: |form| MUST be empty and in XML [WF2].
1880 wakaba 1.40 } else {
1881     $self->{onerror}->(node => $child_el,
1882     type => 'element not allowed:metadata',
1883 wakaba 1.104 level => $self->{level}->{must});
1884 wakaba 1.40 }
1885     $element_state->{in_head_original} = $self->{flag}->{in_head};
1886     $self->{flag}->{in_head} = 1;
1887     },
1888     check_child_text => sub {
1889     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1890     if ($has_significant) {
1891 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1892     level => $self->{level}->{must});
1893 wakaba 1.1 }
1894 wakaba 1.40 },
1895     check_end => sub {
1896     my ($self, $item, $element_state) = @_;
1897     unless ($element_state->{has_title}) {
1898     $self->{onerror}->(node => $item->{node},
1899 wakaba 1.104 type => 'child element missing',
1900     text => 'title',
1901 wakaba 1.105 level => $self->{level}->{must});
1902 wakaba 1.1 }
1903 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1904 wakaba 1.1
1905 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1906 wakaba 1.1 },
1907     };
1908    
1909 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1910     %HTMLTextChecker,
1911 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1912 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1913     %HTMLAttrStatus,
1914 wakaba 1.82 %XHTML2CommonAttrStatus,
1915 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1916     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1917     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1918     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1919 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1920 wakaba 1.49 }),
1921 wakaba 1.40 };
1922 wakaba 1.1
1923 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1924 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1925 wakaba 1.40 %HTMLEmptyChecker,
1926     check_attrs => sub {
1927     my ($self, $item, $element_state) = @_;
1928 wakaba 1.1
1929 wakaba 1.40 if ($self->{has_base}) {
1930     $self->{onerror}->(node => $item->{node},
1931 wakaba 1.104 type => 'element not allowed:base',
1932     level => $self->{level}->{must});
1933 wakaba 1.40 } else {
1934     $self->{has_base} = 1;
1935 wakaba 1.29 }
1936    
1937 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1938     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1939 wakaba 1.14
1940     if ($self->{has_uri_attr} and $has_href) {
1941 wakaba 1.4 ## ISSUE: Are these examples conforming?
1942     ## <head profile="a b c"><base href> (except for |profile|'s
1943     ## non-conformance)
1944     ## <title xml:base="relative"/><base href/> (maybe it should be)
1945     ## <unknown xmlns="relative"/><base href/> (assuming that
1946     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1947     ## <style>@import 'relative';</style><base href>
1948     ## <script>location.href = 'relative';</script><base href>
1949 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1950     ## an exception.
1951 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1952 wakaba 1.104 type => 'basehref after URL attribute',
1953     level => $self->{level}->{must});
1954 wakaba 1.4 }
1955 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1956 wakaba 1.4 ## ISSUE: Are these examples conforming?
1957     ## <head><title xlink:href=""/><base target="name"/></head>
1958     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1959     ## (assuming that |xbl:xbl| is allowed before |base|)
1960     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1961     ## <link href=""/><base target="name"/>
1962     ## <link rel=unknown href=""><base target=name>
1963 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1964 wakaba 1.104 type => 'basetarget after hyperlink',
1965     level => $self->{level}->{must});
1966 wakaba 1.4 }
1967    
1968 wakaba 1.14 if (not $has_href and not $has_target) {
1969 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1970 wakaba 1.104 type => 'attribute missing:href|target',
1971     level => $self->{level}->{must});
1972 wakaba 1.14 }
1973    
1974 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1975    
1976 wakaba 1.4 return $GetHTMLAttrsChecker->({
1977     href => $HTMLURIAttrChecker,
1978     target => $HTMLTargetAttrChecker,
1979 wakaba 1.49 }, {
1980     %HTMLAttrStatus,
1981 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1982     id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1983     target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1984 wakaba 1.40 })->($self, $item, $element_state);
1985 wakaba 1.4 },
1986 wakaba 1.1 };
1987    
1988     $Element->{$HTML_NS}->{link} = {
1989 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1990 wakaba 1.40 %HTMLEmptyChecker,
1991     check_attrs => sub {
1992     my ($self, $item, $element_state) = @_;
1993 wakaba 1.96 my $sizes_attr;
1994 wakaba 1.1 $GetHTMLAttrsChecker->({
1995 wakaba 1.91 charset => sub {
1996     my ($self, $attr) = @_;
1997     $HTMLCharsetChecker->($attr->value, @_);
1998     },
1999 wakaba 1.1 href => $HTMLURIAttrChecker,
2000 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2001 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2002 wakaba 1.1 media => $HTMLMQAttrChecker,
2003     hreflang => $HTMLLanguageTagAttrChecker,
2004 wakaba 1.96 sizes => sub {
2005     my ($self, $attr) = @_;
2006     $sizes_attr = $attr;
2007     my %word;
2008     for my $word (grep {length $_}
2009 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2010 wakaba 1.96 unless ($word{$word}) {
2011     $word{$word} = 1;
2012     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2013     #
2014     } else {
2015     $self->{onerror}->(node => $attr,
2016 wakaba 1.104 type => 'sizes:syntax error',
2017 wakaba 1.96 value => $word,
2018 wakaba 1.104 level => $self->{level}->{must});
2019 wakaba 1.96 }
2020     } else {
2021     $self->{onerror}->(node => $attr, type => 'duplicate token',
2022     value => $word,
2023 wakaba 1.104 level => $self->{level}->{must});
2024 wakaba 1.96 }
2025     }
2026     },
2027 wakaba 1.70 target => $HTMLTargetAttrChecker,
2028 wakaba 1.1 type => $HTMLIMTAttrChecker,
2029     ## NOTE: Though |title| has special semantics,
2030     ## syntactically same as the |title| as global attribute.
2031 wakaba 1.49 }, {
2032     %HTMLAttrStatus,
2033 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2034 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2035     ## NOTE: |charset| attribute had been part of HTML5 spec though
2036     ## it had been commented out.
2037 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2038 wakaba 1.82 FEATURE_M12N10_REC,
2039 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2040     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2041     media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2042 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2043 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2044 wakaba 1.153 FEATURE_M12N10_REC,
2045 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2046 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2047 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2048 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2049 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2050     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2051 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2052 wakaba 1.40 })->($self, $item, $element_state);
2053 wakaba 1.96
2054 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2055     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2056 wakaba 1.4 } else {
2057 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2058 wakaba 1.104 type => 'attribute missing',
2059     text => 'href',
2060     level => $self->{level}->{must});
2061 wakaba 1.1 }
2062 wakaba 1.96
2063 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2064     $self->{onerror}->(node => $item->{node},
2065 wakaba 1.104 type => 'attribute missing',
2066     text => 'rel',
2067     level => $self->{level}->{must});
2068 wakaba 1.96 }
2069    
2070     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2071     $self->{onerror}->(node => $sizes_attr,
2072     type => 'attribute not allowed',
2073 wakaba 1.104 level => $self->{level}->{must});
2074 wakaba 1.1 }
2075 wakaba 1.116
2076     if ($element_state->{link_rel}->{alternate} and
2077     $element_state->{link_rel}->{stylesheet}) {
2078     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2079     unless ($title_attr) {
2080     $self->{onerror}->(node => $item->{node},
2081     type => 'attribute missing',
2082     text => 'title',
2083     level => $self->{level}->{must});
2084     } elsif ($title_attr->value eq '') {
2085     $self->{onerror}->(node => $title_attr,
2086     type => 'empty style sheet title',
2087     level => $self->{level}->{must});
2088     }
2089     }
2090 wakaba 1.1 },
2091     };
2092    
2093     $Element->{$HTML_NS}->{meta} = {
2094 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2095 wakaba 1.40 %HTMLEmptyChecker,
2096     check_attrs => sub {
2097     my ($self, $item, $element_state) = @_;
2098 wakaba 1.1 my $name_attr;
2099     my $http_equiv_attr;
2100     my $charset_attr;
2101     my $content_attr;
2102 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2103 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2104     $attr_ns = '' unless defined $attr_ns;
2105     my $attr_ln = $attr->manakai_local_name;
2106     my $checker;
2107 wakaba 1.73 my $status;
2108 wakaba 1.1 if ($attr_ns eq '') {
2109 wakaba 1.73 $status = {
2110     %HTMLAttrStatus,
2111 wakaba 1.82 %XHTML2CommonAttrStatus,
2112 wakaba 1.153 charset => FEATURE_HTML5_WD,
2113     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2114     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2115     'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2116     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2117     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2118     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2119 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2120     }->{$attr_ln};
2121    
2122 wakaba 1.1 if ($attr_ln eq 'content') {
2123     $content_attr = $attr;
2124     $checker = 1;
2125     } elsif ($attr_ln eq 'name') {
2126     $name_attr = $attr;
2127     $checker = 1;
2128     } elsif ($attr_ln eq 'http-equiv') {
2129     $http_equiv_attr = $attr;
2130     $checker = 1;
2131     } elsif ($attr_ln eq 'charset') {
2132     $charset_attr = $attr;
2133     $checker = 1;
2134 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2135 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2136 wakaba 1.67 $checker = sub {};
2137 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2138     $attr_ln !~ /[A-Z]/) {
2139 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2140     $status = $HTMLDatasetAttrStatus;
2141 wakaba 1.1 } else {
2142     $checker = $HTMLAttrChecker->{$attr_ln}
2143 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2144 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2145     }
2146     } else {
2147     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2148 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2149     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2150     || $AttrStatus->{$attr_ns}->{''};
2151     $status = FEATURE_ALLOWED if not defined $status;
2152 wakaba 1.1 }
2153 wakaba 1.62
2154 wakaba 1.1 if ($checker) {
2155 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2156 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2157 wakaba 1.54 #
2158 wakaba 1.1 } else {
2159 wakaba 1.104 $self->{onerror}->(node => $attr,
2160     type => 'unknown attribute',
2161     level => $self->{level}->{uncertain});
2162 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2163     }
2164    
2165 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2166 wakaba 1.1 }
2167    
2168     if (defined $name_attr) {
2169     if (defined $http_equiv_attr) {
2170     $self->{onerror}->(node => $http_equiv_attr,
2171 wakaba 1.104 type => 'attribute not allowed',
2172     level => $self->{level}->{must});
2173 wakaba 1.1 } elsif (defined $charset_attr) {
2174     $self->{onerror}->(node => $charset_attr,
2175 wakaba 1.104 type => 'attribute not allowed',
2176     level => $self->{level}->{must});
2177 wakaba 1.1 }
2178     my $metadata_name = $name_attr->value;
2179     my $metadata_value;
2180     if (defined $content_attr) {
2181     $metadata_value = $content_attr->value;
2182     } else {
2183 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2184 wakaba 1.104 type => 'attribute missing',
2185     text => 'content',
2186     level => $self->{level}->{must});
2187 wakaba 1.1 $metadata_value = '';
2188     }
2189     } elsif (defined $http_equiv_attr) {
2190     if (defined $charset_attr) {
2191     $self->{onerror}->(node => $charset_attr,
2192 wakaba 1.104 type => 'attribute not allowed',
2193     level => $self->{level}->{must});
2194 wakaba 1.1 }
2195     unless (defined $content_attr) {
2196 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2197 wakaba 1.104 type => 'attribute missing',
2198     text => 'content',
2199     level => $self->{level}->{must});
2200 wakaba 1.1 }
2201     } elsif (defined $charset_attr) {
2202     if (defined $content_attr) {
2203     $self->{onerror}->(node => $content_attr,
2204 wakaba 1.104 type => 'attribute not allowed',
2205     level => $self->{level}->{must});
2206 wakaba 1.1 }
2207     } else {
2208     if (defined $content_attr) {
2209     $self->{onerror}->(node => $content_attr,
2210 wakaba 1.104 type => 'attribute not allowed',
2211     level => $self->{level}->{must});
2212 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2213 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2214     level => $self->{level}->{must});
2215 wakaba 1.1 } else {
2216 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2217 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2218     level => $self->{level}->{must});
2219 wakaba 1.1 }
2220     }
2221    
2222 wakaba 1.32 my $check_charset_decl = sub () {
2223 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2224 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2225     for my $el (@{$parent->child_nodes}) {
2226     next unless $el->node_type == 1; # ELEMENT_NODE
2227 wakaba 1.40 unless ($el eq $item->{node}) {
2228 wakaba 1.29 ## NOTE: Not the first child element.
2229 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2230 wakaba 1.32 type => 'element not allowed:meta charset',
2231 wakaba 1.104 level => $self->{level}->{must});
2232 wakaba 1.29 }
2233     last;
2234     ## NOTE: Entity references are not supported.
2235     }
2236     } else {
2237 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2238 wakaba 1.32 type => 'element not allowed:meta charset',
2239 wakaba 1.104 level => $self->{level}->{must});
2240 wakaba 1.29 }
2241    
2242 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2243     $self->{onerror}->(node => $item->{node},
2244 wakaba 1.32 type => 'in XML:charset',
2245 wakaba 1.104 level => $self->{level}->{must});
2246 wakaba 1.1 }
2247 wakaba 1.32 }; # $check_charset_decl
2248 wakaba 1.21
2249 wakaba 1.32 my $check_charset = sub ($$) {
2250     my ($attr, $charset_value) = @_;
2251 wakaba 1.21
2252 wakaba 1.91 my $charset;
2253     ($charset, $charset_value)
2254     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2255    
2256 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2257 wakaba 1.21 if (defined $ic) {
2258     ## TODO: Test for this case
2259     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2260     if ($charset ne $ic_charset) {
2261 wakaba 1.32 $self->{onerror}->(node => $attr,
2262 wakaba 1.104 type => 'mismatched charset name',
2263 wakaba 1.106 text => $ic,
2264 wakaba 1.104 value => $charset_value,
2265     level => $self->{level}->{must});
2266 wakaba 1.21 }
2267     } else {
2268     ## NOTE: MUST, but not checkable, since the document is not originally
2269     ## in serialized form (or the parser does not preserve the input
2270     ## encoding information).
2271 wakaba 1.32 $self->{onerror}->(node => $attr,
2272 wakaba 1.104 type => 'mismatched charset name not checked',
2273     value => $charset_value,
2274     level => $self->{level}->{uncertain});
2275 wakaba 1.21 }
2276    
2277 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2278     $self->{onerror}->(node => $attr,
2279 wakaba 1.104 type => 'charref in charset',
2280     level => $self->{level}->{must},
2281     layer => 'syntax');
2282 wakaba 1.22 }
2283 wakaba 1.32 }; # $check_charset
2284    
2285     ## TODO: metadata conformance
2286    
2287     ## TODO: pragma conformance
2288     if (defined $http_equiv_attr) { ## An enumerated attribute
2289     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2290 wakaba 1.33
2291 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2292     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2293     node => $http_equiv_attr,
2294 wakaba 1.104 level => $self->{level}->{must});
2295 wakaba 1.85 } else {
2296     $self->{has_http_equiv}->{$keyword} = 1;
2297     }
2298    
2299     if ($keyword eq 'content-type') {
2300 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2301 wakaba 1.33
2302 wakaba 1.32 $check_charset_decl->();
2303     if ($content_attr) {
2304     my $content = $content_attr->value;
2305 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2306 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2307 wakaba 1.58 =(.+)\z!sx) {
2308 wakaba 1.32 $check_charset->($content_attr, $1);
2309     } else {
2310     $self->{onerror}->(node => $content_attr,
2311     type => 'meta content-type syntax error',
2312 wakaba 1.104 level => $self->{level}->{must});
2313 wakaba 1.85 }
2314     }
2315     } elsif ($keyword eq 'default-style') {
2316     ## ISSUE: Not defined yet in the spec.
2317     } elsif ($keyword eq 'refresh') {
2318     if ($content_attr) {
2319     my $content = $content_attr->value;
2320     if ($content =~ /\A[0-9]+\z/) {
2321     ## NOTE: Valid non-negative integer.
2322     #
2323 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2324 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2325     Whatpm::URIChecker->check_iri_reference ($content, sub {
2326 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2327 wakaba 1.106 }, $self->{level});
2328 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2329    
2330     $element_state->{uri_info}->{content}->{node} = $content_attr;
2331     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2332     ## TODO: absolute
2333     push @{$self->{return}->{uri}->{$content} ||= []},
2334     $element_state->{uri_info}->{content};
2335     } else {
2336     $self->{onerror}->(node => $content_attr,
2337     type => 'refresh:syntax error',
2338 wakaba 1.104 level => $self->{level}->{must});
2339 wakaba 1.32 }
2340     }
2341     } else {
2342     $self->{onerror}->(node => $http_equiv_attr,
2343 wakaba 1.104 type => 'enumerated:invalid',
2344     level => $self->{level}->{must});
2345 wakaba 1.32 }
2346     }
2347    
2348     if (defined $charset_attr) {
2349     $check_charset_decl->();
2350     $check_charset->($charset_attr, $charset_attr->value);
2351 wakaba 1.1 }
2352     },
2353     };
2354    
2355     $Element->{$HTML_NS}->{style} = {
2356 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2357 wakaba 1.40 %HTMLChecker,
2358     check_attrs => $GetHTMLAttrsChecker->({
2359 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2360     media => $HTMLMQAttrChecker,
2361     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2362     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2363     ## not different
2364 wakaba 1.49 }, {
2365     %HTMLAttrStatus,
2366 wakaba 1.82 %XHTML2CommonAttrStatus,
2367 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2368 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2369 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2370 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2371     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2372     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2373     scoped => FEATURE_HTML5_FD,
2374     title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2375     type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2376 wakaba 1.1 }),
2377 wakaba 1.40 check_start => sub {
2378     my ($self, $item, $element_state) = @_;
2379    
2380 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2381 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2382 wakaba 1.93 $type = 'text/css' unless defined $type;
2383     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2384     $type = "$1/$2";
2385     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2386     } else {
2387     ## NOTE: We don't know how parameters are handled by UAs. According to
2388     ## HTML5 specification, <style> with unknown parameters in |type=""|
2389     ## must be ignored.
2390     undef $type;
2391     }
2392     if (not defined $type) {
2393     $element_state->{allow_element} = 1; # invalid type=""
2394     } elsif ($type eq 'text/css') {
2395 wakaba 1.40 $element_state->{allow_element} = 0;
2396 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2397     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2398     # $element_state->{allow_element} = 1;
2399 wakaba 1.40 } else {
2400     $element_state->{allow_element} = 1; # unknown
2401     }
2402 wakaba 1.93 $element_state->{style_type} = $type;
2403 wakaba 1.79
2404     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2405     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2406 wakaba 1.107
2407     $element_state->{text} = '';
2408 wakaba 1.40 },
2409     check_child_element => sub {
2410     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2411     $child_is_transparent, $element_state) = @_;
2412 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2413     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2414 wakaba 1.40 $self->{onerror}->(node => $child_el,
2415     type => 'element not allowed:minus',
2416 wakaba 1.104 level => $self->{level}->{must});
2417 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2418     #
2419     } elsif ($element_state->{allow_element}) {
2420     #
2421     } else {
2422 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2423     level => $self->{level}->{must});
2424 wakaba 1.40 }
2425     },
2426     check_child_text => sub {
2427     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2428 wakaba 1.115 $element_state->{text} .= $child_node->data;
2429 wakaba 1.40 },
2430     check_end => sub {
2431     my ($self, $item, $element_state) = @_;
2432 wakaba 1.93 if (not defined $element_state->{style_type}) {
2433     ## NOTE: Invalid type=""
2434     #
2435     } elsif ($element_state->{style_type} eq 'text/css') {
2436 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2437     container_node => $item->{node},
2438 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2439 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2440     ## NOTE: XML content should be checked by THIS instance of checker
2441     ## as part of normal tree validation. However, we don't know of any
2442     ## XML-based styling language that can be used in HTML <style> element,
2443     ## such that we throw a "style language not supported" error.
2444 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2445     type => 'XML style lang',
2446     text => $element_state->{style_type},
2447     level => $self->{level}->{uncertain});
2448 wakaba 1.93 } else {
2449     ## NOTE: Should we raise some kind of error for,
2450     ## say, <style type="text/plaion">?
2451     $self->{onsubdoc}->({s => $element_state->{text},
2452     container_node => $item->{node},
2453     media_type => $element_state->{style_type},
2454     is_char_string => 1});
2455 wakaba 1.27 }
2456 wakaba 1.40
2457     $HTMLChecker{check_end}->(@_);
2458 wakaba 1.1 },
2459     };
2460 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2461 wakaba 1.1
2462     $Element->{$HTML_NS}->{body} = {
2463 wakaba 1.72 %HTMLFlowContentChecker,
2464 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2465 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2466     alink => $HTMLColorAttrChecker,
2467     background => $HTMLURIAttrChecker,
2468     bgcolor => $HTMLColorAttrChecker,
2469     link => $HTMLColorAttrChecker,
2470     text => $HTMLColorAttrChecker,
2471     vlink => $HTMLColorAttrChecker,
2472     }, {
2473 wakaba 1.49 %HTMLAttrStatus,
2474 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2475 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2476     background => FEATURE_M12N10_REC_DEPRECATED,
2477     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2478 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2479 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2480 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2481     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2482 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2483     vlink => FEATURE_M12N10_REC_DEPRECATED,
2484     }),
2485 wakaba 1.68 check_start => sub {
2486     my ($self, $item, $element_state) = @_;
2487    
2488     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2489 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2490     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2491 wakaba 1.68 },
2492 wakaba 1.1 };
2493    
2494     $Element->{$HTML_NS}->{section} = {
2495 wakaba 1.72 %HTMLFlowContentChecker,
2496 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2497 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2498     }, {
2499     %HTMLAttrStatus,
2500     %XHTML2CommonAttrStatus,
2501     }),
2502 wakaba 1.1 };
2503    
2504     $Element->{$HTML_NS}->{nav} = {
2505 wakaba 1.153 status => FEATURE_HTML5_LC,
2506 wakaba 1.72 %HTMLFlowContentChecker,
2507 wakaba 1.1 };
2508    
2509     $Element->{$HTML_NS}->{article} = {
2510 wakaba 1.174 %HTMLFlowContentChecker,
2511 wakaba 1.153 status => FEATURE_HTML5_LC,
2512 wakaba 1.174 check_attrs => $GetHTMLAttrsChecker->({
2513     pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2514     }, {
2515     %HTMLAttrStatus,
2516     # XXX cite
2517     pubdate => FEATURE_HTML5_LC,
2518     }),
2519     }; # article
2520 wakaba 1.1
2521     $Element->{$HTML_NS}->{blockquote} = {
2522 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2523 wakaba 1.72 %HTMLFlowContentChecker,
2524 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2525 wakaba 1.1 cite => $HTMLURIAttrChecker,
2526 wakaba 1.49 }, {
2527     %HTMLAttrStatus,
2528 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2529 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2530 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2531 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2532 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2533 wakaba 1.1 }),
2534 wakaba 1.66 check_start => sub {
2535     my ($self, $item, $element_state) = @_;
2536    
2537     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2538 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2539     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2540 wakaba 1.66 },
2541 wakaba 1.1 };
2542    
2543     $Element->{$HTML_NS}->{aside} = {
2544 wakaba 1.153 status => FEATURE_HTML5_LC,
2545 wakaba 1.72 %HTMLFlowContentChecker,
2546 wakaba 1.1 };
2547    
2548     $Element->{$HTML_NS}->{h1} = {
2549 wakaba 1.40 %HTMLPhrasingContentChecker,
2550 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2551 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2552     align => $GetHTMLEnumeratedAttrChecker->({
2553     left => 1, center => 1, right => 1, justify => 1,
2554     }),
2555     }, {
2556 wakaba 1.49 %HTMLAttrStatus,
2557 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2558 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2559 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2560 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2561 wakaba 1.49 }),
2562 wakaba 1.40 check_start => sub {
2563     my ($self, $item, $element_state) = @_;
2564     $self->{flag}->{has_hn} = 1;
2565 wakaba 1.79
2566     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2567     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2568 wakaba 1.1 },
2569     };
2570    
2571 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2572 wakaba 1.1
2573 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2574 wakaba 1.1
2575 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2576 wakaba 1.1
2577 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2578 wakaba 1.1
2579 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2580 wakaba 1.1
2581 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2582 wakaba 1.174
2583     # XXX footer in header is disallowed (HTML5 revision 3050)
2584 wakaba 1.29
2585 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2586 wakaba 1.153 status => FEATURE_HTML5_LC,
2587 wakaba 1.72 %HTMLFlowContentChecker,
2588 wakaba 1.40 check_start => sub {
2589     my ($self, $item, $element_state) = @_;
2590     $self->_add_minus_elements ($element_state,
2591     {$HTML_NS => {qw/header 1 footer 1/}},
2592 wakaba 1.58 $HTMLSectioningContent);
2593 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2594     $self->{flag}->{has_hn} = 0;
2595 wakaba 1.79
2596     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2597     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2598 wakaba 1.40 },
2599     check_end => sub {
2600     my ($self, $item, $element_state) = @_;
2601     $self->_remove_minus_elements ($element_state);
2602     unless ($self->{flag}->{has_hn}) {
2603     $self->{onerror}->(node => $item->{node},
2604 wakaba 1.104 type => 'element missing:hn',
2605     level => $self->{level}->{must});
2606 wakaba 1.40 }
2607     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2608 wakaba 1.1
2609 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2610 wakaba 1.1 },
2611 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2612 wakaba 1.1 };
2613    
2614     $Element->{$HTML_NS}->{footer} = {
2615 wakaba 1.153 status => FEATURE_HTML5_LC,
2616 wakaba 1.72 %HTMLFlowContentChecker,
2617 wakaba 1.40 check_start => sub {
2618     my ($self, $item, $element_state) = @_;
2619     $self->_add_minus_elements ($element_state,
2620     {$HTML_NS => {footer => 1}},
2621 wakaba 1.58 $HTMLSectioningContent,
2622 wakaba 1.57 $HTMLHeadingContent);
2623 wakaba 1.79
2624     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2625     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2626 wakaba 1.40 },
2627     check_end => sub {
2628     my ($self, $item, $element_state) = @_;
2629     $self->_remove_minus_elements ($element_state);
2630 wakaba 1.1
2631 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2632 wakaba 1.1 },
2633     };
2634    
2635     $Element->{$HTML_NS}->{address} = {
2636 wakaba 1.72 %HTMLFlowContentChecker,
2637 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2638 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2639     ## TODO: add test
2640     #align => $GetHTMLEnumeratedAttrChecker->({
2641     # left => 1, center => 1, right => 1, justify => 1,
2642     #}),
2643     }, {
2644 wakaba 1.49 %HTMLAttrStatus,
2645 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2646 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2647 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2648 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2649     sdapref => FEATURE_HTML20_RFC,
2650 wakaba 1.49 }),
2651 wakaba 1.40 check_start => sub {
2652     my ($self, $item, $element_state) = @_;
2653     $self->_add_minus_elements ($element_state,
2654     {$HTML_NS => {footer => 1, address => 1}},
2655     $HTMLSectioningContent, $HTMLHeadingContent);
2656 wakaba 1.79
2657     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2658     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2659 wakaba 1.40 },
2660     check_end => sub {
2661     my ($self, $item, $element_state) = @_;
2662     $self->_remove_minus_elements ($element_state);
2663 wakaba 1.29
2664 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2665 wakaba 1.29 },
2666 wakaba 1.1 };
2667    
2668     $Element->{$HTML_NS}->{p} = {
2669 wakaba 1.40 %HTMLPhrasingContentChecker,
2670 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2671 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2672     align => $GetHTMLEnumeratedAttrChecker->({
2673     left => 1, center => 1, right => 1, justify => 1,
2674     }),
2675     }, {
2676 wakaba 1.49 %HTMLAttrStatus,
2677 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2678 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2679 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2680 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2681 wakaba 1.49 }),
2682 wakaba 1.1 };
2683    
2684     $Element->{$HTML_NS}->{hr} = {
2685 wakaba 1.40 %HTMLEmptyChecker,
2686 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2687 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2688     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2689     }, {
2690 wakaba 1.49 %HTMLAttrStatus,
2691     %HTMLM12NCommonAttrStatus,
2692     align => FEATURE_M12N10_REC_DEPRECATED,
2693 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2694 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2695 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2696 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2697     width => FEATURE_M12N10_REC_DEPRECATED,
2698     }),
2699 wakaba 1.1 };
2700    
2701     $Element->{$HTML_NS}->{br} = {
2702 wakaba 1.40 %HTMLEmptyChecker,
2703 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2704 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2705     clear => $GetHTMLEnumeratedAttrChecker->({
2706     left => 1, all => 1, right => 1, none => 1,
2707     }),
2708     }, {
2709 wakaba 1.49 %HTMLAttrStatus,
2710 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2711 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2712 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2713 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2714 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2715     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2716 wakaba 1.49 }),
2717 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2718     ## (This requirement is semantic so that we cannot check.)
2719 wakaba 1.1 };
2720    
2721     $Element->{$HTML_NS}->{dialog} = {
2722 wakaba 1.153 status => FEATURE_HTML5_WD,
2723 wakaba 1.40 %HTMLChecker,
2724     check_start => sub {
2725     my ($self, $item, $element_state) = @_;
2726     $element_state->{phase} = 'before dt';
2727 wakaba 1.79
2728     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2729     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2730 wakaba 1.40 },
2731     check_child_element => sub {
2732     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2733     $child_is_transparent, $element_state) = @_;
2734 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2735     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2736 wakaba 1.40 $self->{onerror}->(node => $child_el,
2737     type => 'element not allowed:minus',
2738 wakaba 1.104 level => $self->{level}->{must});
2739 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2740     #
2741     } elsif ($element_state->{phase} eq 'before dt') {
2742     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2743     $element_state->{phase} = 'before dd';
2744     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2745     $self->{onerror}
2746 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2747     text => 'dt',
2748     level => $self->{level}->{must});
2749 wakaba 1.40 $element_state->{phase} = 'before dt';
2750     } else {
2751 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2752     level => $self->{level}->{must});
2753 wakaba 1.40 }
2754     } elsif ($element_state->{phase} eq 'before dd') {
2755     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2756     $element_state->{phase} = 'before dt';
2757     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2758     $self->{onerror}
2759 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2760     text => 'dd',
2761     level => $self->{level}->{must});
2762 wakaba 1.40 $element_state->{phase} = 'before dd';
2763     } else {
2764 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2765     level => $self->{level}->{must});
2766 wakaba 1.1 }
2767 wakaba 1.40 } else {
2768     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2769     }
2770     },
2771     check_child_text => sub {
2772     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2773     if ($has_significant) {
2774 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2775     level => $self->{level}->{must});
2776 wakaba 1.1 }
2777 wakaba 1.40 },
2778     check_end => sub {
2779     my ($self, $item, $element_state) = @_;
2780     if ($element_state->{phase} eq 'before dd') {
2781     $self->{onerror}->(node => $item->{node},
2782 wakaba 1.104 type => 'child element missing',
2783     text => 'dd',
2784     level => $self->{level}->{must});
2785 wakaba 1.1 }
2786 wakaba 1.40
2787     $HTMLChecker{check_end}->(@_);
2788 wakaba 1.1 },
2789     };
2790    
2791     $Element->{$HTML_NS}->{pre} = {
2792 wakaba 1.40 %HTMLPhrasingContentChecker,
2793 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2794 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2795     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2796     }, {
2797 wakaba 1.49 %HTMLAttrStatus,
2798 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2799 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2800 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2801 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2802     }),
2803 wakaba 1.101 check_end => sub {
2804     my ($self, $item, $element_state) = @_;
2805    
2806     ## TODO: Flag to enable/disable IDL checking?
2807 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2808 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2809     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2810     ## NOTE: pre.code > code.idl-code: WebIDL spec
2811     ## NOTE: pre.idl-code: DOM1 spec
2812     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2813     ## NOTE: pre.schema: ReSpec-generated specs
2814 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2815     container_node => $item->{node},
2816     media_type => 'text/x-webidl',
2817     is_char_string => 1});
2818     }
2819    
2820 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2821 wakaba 1.101 },
2822 wakaba 1.1 };
2823    
2824     $Element->{$HTML_NS}->{ol} = {
2825 wakaba 1.40 %HTMLChecker,
2826 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2827 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2828 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2829 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2830 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2831 wakaba 1.69 ## TODO: HTML4 |type|
2832 wakaba 1.49 }, {
2833     %HTMLAttrStatus,
2834 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2835 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2836 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2837 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2838     reversed => FEATURE_HTML5_WD,
2839 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2840 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2841     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2842 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2843 wakaba 1.1 }),
2844 wakaba 1.40 check_child_element => sub {
2845     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2846     $child_is_transparent, $element_state) = @_;
2847 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2848     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2849 wakaba 1.40 $self->{onerror}->(node => $child_el,
2850     type => 'element not allowed:minus',
2851 wakaba 1.104 level => $self->{level}->{must});
2852 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2853     #
2854     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2855     #
2856     } else {
2857 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2858     level => $self->{level}->{must});
2859 wakaba 1.1 }
2860 wakaba 1.40 },
2861     check_child_text => sub {
2862     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2863     if ($has_significant) {
2864 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2865     level => $self->{level}->{must});
2866 wakaba 1.1 }
2867     },
2868     };
2869    
2870     $Element->{$HTML_NS}->{ul} = {
2871 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2872 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2873 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2874     compact => $GetHTMLBooleanAttrChecker->('compact'),
2875 wakaba 1.69 ## TODO: HTML4 |type|
2876     ## TODO: sdaform, align
2877 wakaba 1.68 }, {
2878 wakaba 1.49 %HTMLAttrStatus,
2879 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2880 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2881 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2882 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2883 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2884 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2885     }),
2886 wakaba 1.1 };
2887    
2888 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2889     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2890     %{$Element->{$HTML_NS}->{ul}},
2891     status => FEATURE_M12N10_REC_DEPRECATED,
2892 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2893     compact => $GetHTMLBooleanAttrChecker->('compact'),
2894     }, {
2895 wakaba 1.64 %HTMLAttrStatus,
2896     %HTMLM12NCommonAttrStatus,
2897     align => FEATURE_HTML2X_RFC,
2898     compact => FEATURE_M12N10_REC_DEPRECATED,
2899 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2900 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2901     sdapref => FEATURE_HTML20_RFC,
2902     }),
2903     };
2904    
2905 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2906 wakaba 1.72 %HTMLFlowContentChecker,
2907 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2908 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2909 wakaba 1.69 ## TODO: HTML4 |type|
2910 wakaba 1.49 value => sub {
2911 wakaba 1.1 my ($self, $attr) = @_;
2912 wakaba 1.152
2913     my $parent_is_ol;
2914 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2915     if (defined $parent) {
2916     my $parent_ns = $parent->namespace_uri;
2917     $parent_ns = '' unless defined $parent_ns;
2918     my $parent_ln = $parent->manakai_local_name;
2919 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2920     }
2921    
2922     unless ($parent_is_ol) {
2923     ## ISSUE: No "MUST" in the spec.
2924     $self->{onerror}->(node => $attr,
2925     type => 'non-ol li value',
2926     level => $self->{level}->{html5_fact});
2927 wakaba 1.1 }
2928 wakaba 1.152
2929 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2930 wakaba 1.131 },
2931 wakaba 1.49 }, {
2932     %HTMLAttrStatus,
2933 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2934 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2935 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2936 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2937 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2938 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2939 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2940 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2941 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2942 wakaba 1.1 }),
2943 wakaba 1.40 check_child_element => sub {
2944     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2945     $child_is_transparent, $element_state) = @_;
2946     if ($self->{flag}->{in_menu}) {
2947 wakaba 1.152 ## TODO: In <dir> element, then ...
2948 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2949     } else {
2950 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2951 wakaba 1.40 }
2952     },
2953     check_child_text => sub {
2954     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2955     if ($self->{flag}->{in_menu}) {
2956 wakaba 1.152 ## TODO: In <dir> element, then ...
2957 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2958 wakaba 1.1 } else {
2959 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
2960 wakaba 1.1 }
2961     },
2962     };
2963    
2964     $Element->{$HTML_NS}->{dl} = {
2965 wakaba 1.40 %HTMLChecker,
2966 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2967 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2968     compact => $GetHTMLBooleanAttrChecker->('compact'),
2969     }, {
2970 wakaba 1.49 %HTMLAttrStatus,
2971 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2972 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2973 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2974 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2975     sdapref => FEATURE_HTML20_RFC,
2976 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2977     }),
2978 wakaba 1.40 check_start => sub {
2979     my ($self, $item, $element_state) = @_;
2980     $element_state->{phase} = 'before dt';
2981 wakaba 1.79
2982     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2983     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2984 wakaba 1.40 },
2985     check_child_element => sub {
2986     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2987     $child_is_transparent, $element_state) = @_;
2988 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2989     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2990 wakaba 1.40 $self->{onerror}->(node => $child_el,
2991     type => 'element not allowed:minus',
2992 wakaba 1.104 level => $self->{level}->{must});
2993 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2994     #
2995     } elsif ($element_state->{phase} eq 'in dds') {
2996     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2997     #$element_state->{phase} = 'in dds';
2998     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2999     $element_state->{phase} = 'in dts';
3000     } else {
3001 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3002     level => $self->{level}->{must});
3003 wakaba 1.40 }
3004     } elsif ($element_state->{phase} eq 'in dts') {
3005     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3006     #$element_state->{phase} = 'in dts';
3007     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3008     $element_state->{phase} = 'in dds';
3009     } else {
3010 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3011     level => $self->{level}->{must});
3012 wakaba 1.40 }
3013     } elsif ($element_state->{phase} eq 'before dt') {
3014     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3015     $element_state->{phase} = 'in dts';
3016     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3017     $self->{onerror}
3018 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3019     text => 'dt',
3020     level => $self->{level}->{must});
3021 wakaba 1.40 $element_state->{phase} = 'in dds';
3022     } else {
3023 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3024     level => $self->{level}->{must});
3025 wakaba 1.1 }
3026 wakaba 1.40 } else {
3027     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3028 wakaba 1.1 }
3029 wakaba 1.40 },
3030     check_child_text => sub {
3031     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3032     if ($has_significant) {
3033 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3034     level => $self->{level}->{must});
3035 wakaba 1.40 }
3036     },
3037     check_end => sub {
3038     my ($self, $item, $element_state) = @_;
3039     if ($element_state->{phase} eq 'in dts') {
3040     $self->{onerror}->(node => $item->{node},
3041 wakaba 1.104 type => 'child element missing',
3042     text => 'dd',
3043     level => $self->{level}->{must});
3044 wakaba 1.1 }
3045    
3046 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3047 wakaba 1.1 },
3048     };
3049    
3050     $Element->{$HTML_NS}->{dt} = {
3051 wakaba 1.40 %HTMLPhrasingContentChecker,
3052 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3053 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3054     %HTMLAttrStatus,
3055 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3056 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3057 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3058 wakaba 1.49 }),
3059 wakaba 1.1 };
3060    
3061     $Element->{$HTML_NS}->{dd} = {
3062 wakaba 1.72 %HTMLFlowContentChecker,
3063 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3064 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3065     %HTMLAttrStatus,
3066 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3067 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3068 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3069 wakaba 1.49 }),
3070 wakaba 1.1 };
3071    
3072     $Element->{$HTML_NS}->{a} = {
3073 wakaba 1.123 %HTMLTransparentChecker,
3074 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3075 wakaba 1.40 check_attrs => sub {
3076     my ($self, $item, $element_state) = @_;
3077 wakaba 1.1 my %attr;
3078 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3079 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3080     $attr_ns = '' unless defined $attr_ns;
3081     my $attr_ln = $attr->manakai_local_name;
3082     my $checker;
3083 wakaba 1.73 my $status;
3084 wakaba 1.1 if ($attr_ns eq '') {
3085 wakaba 1.73 $status = {
3086     %HTMLAttrStatus,
3087 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3088 wakaba 1.73 accesskey => FEATURE_M12N10_REC,
3089     charset => FEATURE_M12N10_REC,
3090 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3091 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3092     dn => FEATURE_RFC2659,
3093 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3094 wakaba 1.153 FEATURE_M12N10_REC,
3095     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3096     FEATURE_M12N10_REC,
3097     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3098     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3099 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3100     name => FEATURE_M12N10_REC_DEPRECATED,
3101     nonce => FEATURE_RFC2659,
3102     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3103     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3104 wakaba 1.153 ping => FEATURE_HTML5_WD,
3105 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3106     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3107 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3108 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3109 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3110 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3111     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3112 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3113     }->{$attr_ln};
3114    
3115 wakaba 1.1 $checker = {
3116 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3117 wakaba 1.91 charset => sub {
3118     my ($self, $attr) = @_;
3119     $HTMLCharsetChecker->($attr->value, @_);
3120     },
3121 wakaba 1.70 ## TODO: HTML4 |coords|
3122 wakaba 1.1 target => $HTMLTargetAttrChecker,
3123     href => $HTMLURIAttrChecker,
3124     ping => $HTMLSpaceURIsAttrChecker,
3125 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3126 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3127 wakaba 1.70 ## TODO: HTML4 |shape|
3128 wakaba 1.1 media => $HTMLMQAttrChecker,
3129 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3130 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3131     type => $HTMLIMTAttrChecker,
3132     }->{$attr_ln};
3133     if ($checker) {
3134     $attr{$attr_ln} = $attr;
3135 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3136     $attr_ln !~ /[A-Z]/) {
3137 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3138     $status = $HTMLDatasetAttrStatus;
3139 wakaba 1.1 } else {
3140     $checker = $HTMLAttrChecker->{$attr_ln};
3141     }
3142     }
3143     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3144     || $AttrChecker->{$attr_ns}->{''};
3145 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3146     || $AttrStatus->{$attr_ns}->{''};
3147     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3148 wakaba 1.62
3149 wakaba 1.1 if ($checker) {
3150 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3151 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3152 wakaba 1.54 #
3153 wakaba 1.1 } else {
3154 wakaba 1.104 $self->{onerror}->(node => $attr,
3155     type => 'unknown attribute',
3156     level => $self->{level}->{uncertain});
3157 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3158 wakaba 1.1 }
3159 wakaba 1.49
3160 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3161 wakaba 1.1 }
3162    
3163 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3164 wakaba 1.4 if (defined $attr{href}) {
3165     $self->{has_hyperlink_element} = 1;
3166 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3167 wakaba 1.4 } else {
3168 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3169     if (defined $attr{$_}) {
3170     $self->{onerror}->(node => $attr{$_},
3171 wakaba 1.104 type => 'attribute not allowed',
3172     level => $self->{level}->{must});
3173 wakaba 1.1 }
3174     }
3175     }
3176 wakaba 1.66
3177     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3178 wakaba 1.1 },
3179 wakaba 1.40 check_start => sub {
3180     my ($self, $item, $element_state) = @_;
3181     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3182 wakaba 1.79
3183     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3184     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3185 wakaba 1.40 },
3186     check_end => sub {
3187     my ($self, $item, $element_state) = @_;
3188     $self->_remove_minus_elements ($element_state);
3189 wakaba 1.59 delete $self->{flag}->{in_a_href}
3190     unless $element_state->{in_a_href_original};
3191 wakaba 1.1
3192 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3193 wakaba 1.1 },
3194     };
3195    
3196     $Element->{$HTML_NS}->{q} = {
3197 wakaba 1.153 status => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3198 wakaba 1.40 %HTMLPhrasingContentChecker,
3199     check_attrs => $GetHTMLAttrsChecker->({
3200 wakaba 1.50 cite => $HTMLURIAttrChecker,
3201     }, {
3202 wakaba 1.49 %HTMLAttrStatus,
3203 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3204 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3205     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3206 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3207     sdasuff => FEATURE_HTML2X_RFC,
3208 wakaba 1.1 }),
3209 wakaba 1.66 check_start => sub {
3210     my ($self, $item, $element_state) = @_;
3211    
3212     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3213 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3214     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3215 wakaba 1.66 },
3216 wakaba 1.1 };
3217 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3218     ## placed inside the <code>q</code> element." Though we cannot test the
3219     ## element against this requirement since it incluides a semantic bit,
3220     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3221     ## the |q| element.
3222 wakaba 1.1
3223     $Element->{$HTML_NS}->{cite} = {
3224 wakaba 1.40 %HTMLPhrasingContentChecker,
3225 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3226 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3227     %HTMLAttrStatus,
3228 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3229 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3230 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3231 wakaba 1.49 }),
3232 wakaba 1.1 };
3233    
3234     $Element->{$HTML_NS}->{em} = {
3235 wakaba 1.40 %HTMLPhrasingContentChecker,
3236 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3237 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3238     %HTMLAttrStatus,
3239 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3240 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3241 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3242 wakaba 1.49 }),
3243 wakaba 1.1 };
3244    
3245     $Element->{$HTML_NS}->{strong} = {
3246 wakaba 1.40 %HTMLPhrasingContentChecker,
3247 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3248 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3249     %HTMLAttrStatus,
3250 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3251 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3252 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3253 wakaba 1.49 }),
3254 wakaba 1.1 };
3255    
3256     $Element->{$HTML_NS}->{small} = {
3257 wakaba 1.40 %HTMLPhrasingContentChecker,
3258 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3259 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3260     %HTMLAttrStatus,
3261     %HTMLM12NCommonAttrStatus,
3262 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3263 wakaba 1.49 }),
3264 wakaba 1.1 };
3265    
3266 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3267     %HTMLPhrasingContentChecker,
3268     status => FEATURE_M12N10_REC,
3269     check_attrs => $GetHTMLAttrsChecker->({}, {
3270     %HTMLAttrStatus,
3271     %HTMLM12NCommonAttrStatus,
3272 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3273 wakaba 1.51 }),
3274     };
3275    
3276 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3277 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3278 wakaba 1.40 %HTMLPhrasingContentChecker,
3279 wakaba 1.1 };
3280    
3281     $Element->{$HTML_NS}->{dfn} = {
3282 wakaba 1.40 %HTMLPhrasingContentChecker,
3283 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3284 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3285     %HTMLAttrStatus,
3286 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3287 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3288 wakaba 1.49 }),
3289 wakaba 1.40 check_start => sub {
3290     my ($self, $item, $element_state) = @_;
3291     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3292 wakaba 1.1
3293 wakaba 1.40 my $node = $item->{node};
3294 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3295     unless (defined $term) {
3296     for my $child (@{$node->child_nodes}) {
3297     if ($child->node_type == 1) { # ELEMENT_NODE
3298     if (defined $term) {
3299     undef $term;
3300     last;
3301     } elsif ($child->manakai_local_name eq 'abbr') {
3302     my $nsuri = $child->namespace_uri;
3303     if (defined $nsuri and $nsuri eq $HTML_NS) {
3304     my $attr = $child->get_attribute_node_ns (undef, 'title');
3305     if ($attr) {
3306     $term = $attr->value;
3307     }
3308     }
3309     }
3310     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3311     ## TEXT_NODE or CDATA_SECTION_NODE
3312 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3313 wakaba 1.1 next;
3314     }
3315     undef $term;
3316     last;
3317     }
3318     }
3319     unless (defined $term) {
3320     $term = $node->text_content;
3321     }
3322     }
3323     if ($self->{term}->{$term}) {
3324     push @{$self->{term}->{$term}}, $node;
3325     } else {
3326     $self->{term}->{$term} = [$node];
3327     }
3328 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3329     ## |ruby| unless |dfn| has |title|.
3330 wakaba 1.79
3331     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3332     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3333 wakaba 1.40 },
3334     check_end => sub {
3335     my ($self, $item, $element_state) = @_;
3336     $self->_remove_minus_elements ($element_state);
3337 wakaba 1.1
3338 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3339 wakaba 1.1 },
3340     };
3341    
3342     $Element->{$HTML_NS}->{abbr} = {
3343 wakaba 1.40 %HTMLPhrasingContentChecker,
3344 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3345 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3346     %HTMLAttrStatus,
3347 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3348     full => FEATURE_XHTML2_ED,
3349 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3350 wakaba 1.49 }),
3351 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3352     ## number (plural vs singular) must match the grammatical number of the
3353     ## contents of the element." Though this can be checked by machine,
3354     ## it requires language-specific knowledge and dictionary, such that
3355     ## we don't support the check of the requirement.
3356     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3357 wakaba 1.49 };
3358    
3359     $Element->{$HTML_NS}->{acronym} = {
3360     %HTMLPhrasingContentChecker,
3361     status => FEATURE_M12N10_REC,
3362     check_attrs => $GetHTMLAttrsChecker->({}, {
3363     %HTMLAttrStatus,
3364     %HTMLM12NCommonAttrStatus,
3365 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3366 wakaba 1.49 }),
3367 wakaba 1.1 };
3368    
3369     $Element->{$HTML_NS}->{time} = {
3370 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3371 wakaba 1.40 %HTMLPhrasingContentChecker,
3372     check_attrs => $GetHTMLAttrsChecker->({
3373 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3374 wakaba 1.49 }, {
3375     %HTMLAttrStatus,
3376     %HTMLM12NCommonAttrStatus,
3377 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3378 wakaba 1.1 }),
3379 wakaba 1.168 ## TODO: Update definition
3380 wakaba 1.1 ## TODO: Write tests
3381 wakaba 1.40 check_end => sub {
3382     my ($self, $item, $element_state) = @_;
3383 wakaba 1.1
3384 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3385 wakaba 1.1 my $input;
3386     my $reg_sp;
3387     my $input_node;
3388     if ($attr) {
3389     $input = $attr->value;
3390 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3391 wakaba 1.1 $input_node = $attr;
3392     } else {
3393 wakaba 1.40 $input = $item->{node}->text_content;
3394 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3395 wakaba 1.40 $input_node = $item->{node};
3396 wakaba 1.1
3397     ## ISSUE: What is the definition for "successfully extracts a date
3398     ## or time"? If the algorithm says the string is invalid but
3399     ## return some date or time, is it "successfully"?
3400     }
3401    
3402     my $hour;
3403     my $minute;
3404     my $second;
3405     if ($input =~ /
3406     \A
3407 wakaba 1.112 $reg_sp
3408 wakaba 1.1 ([0-9]+) # 1
3409     (?>
3410     -([0-9]+) # 2
3411 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3412     $reg_sp
3413 wakaba 1.1 (?>
3414     T
3415 wakaba 1.112 $reg_sp
3416 wakaba 1.1 )?
3417     ([0-9]+) # 4
3418     :([0-9]+) # 5
3419     (?>
3420     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3421     )?
3422 wakaba 1.112 $reg_sp
3423 wakaba 1.1 (?>
3424     Z
3425 wakaba 1.112 $reg_sp
3426 wakaba 1.1 |
3427     [+-]([0-9]+):([0-9]+) # 7, 8
3428 wakaba 1.112 $reg_sp
3429 wakaba 1.1 )?
3430     \z
3431     |
3432     :([0-9]+) # 9
3433     (?>
3434     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3435     )?
3436 wakaba 1.112 $reg_sp
3437     \z
3438 wakaba 1.1 )
3439     /x) {
3440     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3441     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3442     length $4 != 2 or length $5 != 2) {
3443     $self->{onerror}->(node => $input_node,
3444 wakaba 1.104 type => 'dateortime:syntax error',
3445     level => $self->{level}->{must});
3446 wakaba 1.1 }
3447    
3448     if (1 <= $2 and $2 <= 12) {
3449 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3450     level => $self->{level}->{must})
3451 wakaba 1.1 if $3 < 1 or
3452     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3453 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3454     level => $self->{level}->{must})
3455 wakaba 1.1 if $2 == 2 and $3 == 29 and
3456     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3457     } else {
3458     $self->{onerror}->(node => $input_node,
3459 wakaba 1.104 type => 'datetime:bad month',
3460     level => $self->{level}->{must});
3461 wakaba 1.1 }
3462    
3463     ($hour, $minute, $second) = ($4, $5, $6);
3464    
3465     if (defined $7) { ## [+-]hh:mm
3466     if (length $7 != 2 or length $8 != 2) {
3467     $self->{onerror}->(node => $input_node,
3468 wakaba 1.104 type => 'dateortime:syntax error',
3469     level => $self->{level}->{must});
3470 wakaba 1.1 }
3471    
3472     $self->{onerror}->(node => $input_node,
3473 wakaba 1.104 type => 'datetime:bad timezone hour',
3474     level => $self->{level}->{must})
3475 wakaba 1.1 if $7 > 23;
3476     $self->{onerror}->(node => $input_node,
3477 wakaba 1.104 type => 'datetime:bad timezone minute',
3478     level => $self->{level}->{must})
3479 wakaba 1.1 if $8 > 59;
3480     }
3481     } else { ## hh:mm
3482     if (length $1 != 2 or length $9 != 2) {
3483     $self->{onerror}->(node => $input_node,
3484 wakaba 1.104 type => qq'dateortime:syntax error',
3485     level => $self->{level}->{must});
3486 wakaba 1.1 }
3487    
3488     ($hour, $minute, $second) = ($1, $9, $10);
3489     }
3490    
3491 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3492     level => $self->{level}->{must}) if $hour > 23;
3493     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3494     level => $self->{level}->{must}) if $minute > 59;
3495 wakaba 1.1
3496     if (defined $second) { ## s
3497     ## NOTE: Integer part of second don't have to have length of two.
3498    
3499     if (substr ($second, 0, 1) eq '.') {
3500     $self->{onerror}->(node => $input_node,
3501 wakaba 1.104 type => 'dateortime:syntax error',
3502     level => $self->{level}->{must});
3503 wakaba 1.1 }
3504    
3505 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3506     level => $self->{level}->{must}) if $second >= 60;
3507 wakaba 1.1 }
3508     } else {
3509     $self->{onerror}->(node => $input_node,
3510 wakaba 1.104 type => 'dateortime:syntax error',
3511     level => $self->{level}->{must});
3512 wakaba 1.1 }
3513    
3514 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3515 wakaba 1.1 },
3516     };
3517    
3518     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3519 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3520 wakaba 1.113 ## TODO: content checking
3521     ## TODO: content or value must contain number (rev 2053)
3522 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3523 wakaba 1.40 %HTMLPhrasingContentChecker,
3524     check_attrs => $GetHTMLAttrsChecker->({
3525 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3526     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3527     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3528     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3529     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3530     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3531 wakaba 1.50 }, {
3532     %HTMLAttrStatus,
3533     high => FEATURE_HTML5_DEFAULT,
3534     low => FEATURE_HTML5_DEFAULT,
3535     max => FEATURE_HTML5_DEFAULT,
3536     min => FEATURE_HTML5_DEFAULT,
3537     optimum => FEATURE_HTML5_DEFAULT,
3538     value => FEATURE_HTML5_DEFAULT,
3539 wakaba 1.1 }),
3540     };
3541    
3542     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3543 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3544 wakaba 1.40 %HTMLPhrasingContentChecker,
3545     check_attrs => $GetHTMLAttrsChecker->({
3546 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3547     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3548 wakaba 1.50 }, {
3549     %HTMLAttrStatus,
3550     max => FEATURE_HTML5_DEFAULT,
3551     value => FEATURE_HTML5_DEFAULT,
3552 wakaba 1.1 }),
3553     };
3554    
3555     $Element->{$HTML_NS}->{code} = {
3556 wakaba 1.40 %HTMLPhrasingContentChecker,
3557 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3558 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3559     %HTMLAttrStatus,
3560 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3561 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3562 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3563 wakaba 1.49 }),
3564 wakaba 1.1 };
3565    
3566     $Element->{$HTML_NS}->{var} = {
3567 wakaba 1.40 %HTMLPhrasingContentChecker,
3568 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3569 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3570     %HTMLAttrStatus,
3571 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3572 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3573 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3574 wakaba 1.49 }),
3575 wakaba 1.1 };
3576    
3577     $Element->{$HTML_NS}->{samp} = {
3578 wakaba 1.40 %HTMLPhrasingContentChecker,
3579 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3580 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3581     %HTMLAttrStatus,
3582 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3583 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3584 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3585 wakaba 1.49 }),
3586 wakaba 1.1 };
3587    
3588     $Element->{$HTML_NS}->{kbd} = {
3589 wakaba 1.40 %HTMLPhrasingContentChecker,
3590 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3591 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3592     %HTMLAttrStatus,
3593 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3594 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3595 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3596 wakaba 1.49 }),
3597 wakaba 1.1 };
3598    
3599     $Element->{$HTML_NS}->{sub} = {
3600 wakaba 1.40 %HTMLPhrasingContentChecker,
3601 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3602 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3603     %HTMLAttrStatus,
3604 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3605 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3606 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3607 wakaba 1.49 }),
3608 wakaba 1.1 };
3609    
3610 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3611 wakaba 1.1
3612     $Element->{$HTML_NS}->{span} = {
3613 wakaba 1.40 %HTMLPhrasingContentChecker,
3614 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3615 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3616     %HTMLAttrStatus,
3617 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3618 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3619     dataformatas => FEATURE_HTML4_REC_RESERVED,
3620     datasrc => FEATURE_HTML4_REC_RESERVED,
3621 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3622 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3623 wakaba 1.49 }),
3624 wakaba 1.1 };
3625    
3626     $Element->{$HTML_NS}->{i} = {
3627 wakaba 1.40 %HTMLPhrasingContentChecker,
3628 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3629     check_attrs => $GetHTMLAttrsChecker->({}, {
3630     %HTMLAttrStatus,
3631     %HTMLM12NCommonAttrStatus,
3632 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3633 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3634 wakaba 1.49 }),
3635 wakaba 1.1 };
3636    
3637 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3638    
3639 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3640     %HTMLPhrasingContentChecker,
3641     status => FEATURE_M12N10_REC,
3642     check_attrs => $GetHTMLAttrsChecker->({}, {
3643     %HTMLAttrStatus,
3644     %HTMLM12NCommonAttrStatus,
3645 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3646 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3647     }),
3648     };
3649 wakaba 1.51
3650     $Element->{$HTML_NS}->{s} = {
3651 wakaba 1.40 %HTMLPhrasingContentChecker,
3652 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3653 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3654     %HTMLAttrStatus,
3655     %HTMLM12NCommonAttrStatus,
3656 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3657 wakaba 1.49 }),
3658 wakaba 1.1 };
3659    
3660 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3661    
3662     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3663    
3664 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3665 wakaba 1.40 %HTMLPhrasingContentChecker,
3666 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3667 wakaba 1.40 check_attrs => sub {
3668     my ($self, $item, $element_state) = @_;
3669 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3670     %HTMLAttrStatus,
3671 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3672     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3673     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3674     style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3675     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3676     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3677 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3678     sdasuff => FEATURE_HTML2X_RFC,
3679 wakaba 1.49 })->($self, $item, $element_state);
3680 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3681     $self->{onerror}->(node => $item->{node},
3682 wakaba 1.104 type => 'attribute missing',
3683     text => 'dir',
3684     level => $self->{level}->{must});
3685 wakaba 1.1 }
3686     },
3687     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3688     };
3689    
3690 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3691     %HTMLPhrasingContentChecker,
3692     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3693     check_attrs => $GetHTMLAttrsChecker->({}, {
3694     %HTMLAttrStatus,
3695     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3696 wakaba 1.153 lang => FEATURE_HTML5_WD,
3697 wakaba 1.99 }),
3698     check_start => sub {
3699     my ($self, $item, $element_state) = @_;
3700    
3701     $element_state->{phase} = 'before-rb';
3702     #$element_state->{has_sig}
3703 wakaba 1.100
3704     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3705     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3706 wakaba 1.99 },
3707     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3708     check_child_element => sub {
3709     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3710     $child_is_transparent, $element_state) = @_;
3711 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3712     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3713 wakaba 1.99 $self->{onerror}->(node => $child_el,
3714     type => 'element not allowed:minus',
3715 wakaba 1.104 level => $self->{level}->{must});
3716 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3717     #
3718     } elsif ($element_state->{phase} eq 'before-rb') {
3719     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3720     $element_state->{phase} = 'in-rb';
3721     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3722     $self->{onerror}->(node => $child_el,
3723 wakaba 1.104 level => $self->{level}->{should},
3724     type => 'no significant content before');
3725 wakaba 1.99 $element_state->{phase} = 'after-rt';
3726     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3727     $self->{onerror}->(node => $child_el,
3728 wakaba 1.104 level => $self->{level}->{should},
3729     type => 'no significant content before');
3730 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3731     } else {
3732     $self->{onerror}->(node => $child_el,
3733 wakaba 1.104 type => 'element not allowed:ruby base',
3734     level => $self->{level}->{must});
3735 wakaba 1.99 $element_state->{phase} = 'in-rb';
3736     }
3737     } elsif ($element_state->{phase} eq 'in-rb') {
3738     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3739     #$element_state->{phase} = 'in-rb';
3740     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3741     unless ($element_state->{has_significant}) {
3742     $self->{onerror}->(node => $child_el,
3743 wakaba 1.104 level => $self->{level}->{should},
3744     type => 'no significant content before');
3745 wakaba 1.99 }
3746     $element_state->{phase} = 'after-rt';
3747     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3748     unless ($element_state->{has_significant}) {
3749     $self->{onerror}->(node => $child_el,
3750 wakaba 1.104 level => $self->{level}->{should},
3751     type => 'no significant content before');
3752 wakaba 1.99 }
3753     $element_state->{phase} = 'after-rp1';
3754     } else {
3755     $self->{onerror}->(node => $child_el,
3756 wakaba 1.104 type => 'element not allowed:ruby base',
3757     level => $self->{level}->{must});
3758 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3759     }
3760     } elsif ($element_state->{phase} eq 'after-rt') {
3761     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3762     if ($element_state->{has_significant}) {
3763     $element_state->{has_sig} = 1;
3764     delete $element_state->{has_significant};
3765     }
3766     $element_state->{phase} = 'in-rb';
3767     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3768     $self->{onerror}->(node => $child_el,
3769 wakaba 1.104 level => $self->{level}->{should},
3770     type => 'no significant content before');
3771 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3772     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3773     $self->{onerror}->(node => $child_el,
3774 wakaba 1.104 level => $self->{level}->{should},
3775     type => 'no significant content before');
3776 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3777     } else {
3778     $self->{onerror}->(node => $child_el,
3779 wakaba 1.104 type => 'element not allowed:ruby base',
3780     level => $self->{level}->{must});
3781 wakaba 1.99 if ($element_state->{has_significant}) {
3782     $element_state->{has_sig} = 1;
3783     delete $element_state->{has_significant};
3784     }
3785     $element_state->{phase} = 'in-rb';
3786     }
3787     } elsif ($element_state->{phase} eq 'after-rp1') {
3788     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3789     $element_state->{phase} = 'after-rp-rt';
3790     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3791     $self->{onerror}->(node => $child_el,
3792 wakaba 1.104 type => 'ps element missing',
3793     text => 'rt',
3794     level => $self->{level}->{must});
3795 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3796     } else {
3797     $self->{onerror}->(node => $child_el,
3798 wakaba 1.104 type => 'ps element missing',
3799     text => 'rt',
3800     level => $self->{level}->{must});
3801 wakaba 1.99 $self->{onerror}->(node => $child_el,
3802 wakaba 1.104 type => 'ps element missing',
3803     text => 'rp',
3804     level => $self->{level}->{must});
3805 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3806     $self->{onerror}->(node => $child_el,
3807 wakaba 1.104 type => 'element not allowed:ruby base',
3808     level => $self->{level}->{must});
3809 wakaba 1.99 }
3810     if ($element_state->{has_significant}) {
3811     $element_state->{has_sig} = 1;
3812     delete $element_state->{has_significant};
3813     }
3814     $element_state->{phase} = 'in-rb';
3815     }
3816     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3817     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3818     $element_state->{phase} = 'after-rp2';
3819     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3820     $self->{onerror}->(node => $child_el,
3821 wakaba 1.104 type => 'ps element missing',
3822     text => 'rp',
3823     level => $self->{level}->{must});
3824 wakaba 1.99 $self->{onerror}->(node => $child_el,
3825 wakaba 1.104 level => $self->{level}->{should},
3826     type => 'no significant content before');
3827 wakaba 1.99 $element_state->{phase} = 'after-rt';
3828     } else {
3829     $self->{onerror}->(node => $child_el,
3830 wakaba 1.104 type => 'ps element missing',
3831     text => 'rp',
3832     level => $self->{level}->{must});
3833 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3834     $self->{onerror}->(node => $child_el,
3835 wakaba 1.104 type => 'element not allowed:ruby base',
3836     level => $self->{level}->{must});
3837 wakaba 1.99 }
3838     if ($element_state->{has_significant}) {
3839     $element_state->{has_sig} = 1;
3840     delete $element_state->{has_significant};
3841     }
3842     $element_state->{phase} = 'in-rb';
3843     }
3844     } elsif ($element_state->{phase} eq 'after-rp2') {
3845     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3846     if ($element_state->{has_significant}) {
3847     $element_state->{has_sig} = 1;
3848     delete $element_state->{has_significant};
3849     }
3850     $element_state->{phase} = 'in-rb';
3851     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3852     $self->{onerror}->(node => $child_el,
3853 wakaba 1.104 level => $self->{level}->{should},
3854     type => 'no significant content before');
3855 wakaba 1.99 $element_state->{phase} = 'after-rt';
3856     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3857     $self->{onerror}->(node => $child_el,
3858 wakaba 1.104 level => $self->{level}->{should},
3859     type => 'no significant content before');
3860 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3861     } else {
3862     $self->{onerror}->(node => $child_el,
3863 wakaba 1.104 type => 'element not allowed:ruby base',
3864     level => $self->{level}->{must});
3865 wakaba 1.99 if ($element_state->{has_significant}) {
3866     $element_state->{has_sig} = 1;
3867     delete $element_state->{has_significant};
3868     }
3869     $element_state->{phase} = 'in-rb';
3870     }
3871     } else {
3872     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3873     }
3874     },
3875     check_child_text => sub {
3876     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3877     if ($has_significant) {
3878     if ($element_state->{phase} eq 'before-rb') {
3879     $element_state->{phase} = 'in-rb';
3880     } elsif ($element_state->{phase} eq 'in-rb') {
3881     #
3882     } elsif ($element_state->{phase} eq 'after-rt' or
3883     $element_state->{phase} eq 'after-rp2') {
3884     $element_state->{phase} = 'in-rb';
3885     } elsif ($element_state->{phase} eq 'after-rp1') {
3886     $self->{onerror}->(node => $child_node,
3887 wakaba 1.104 type => 'ps element missing',
3888     text => 'rt',
3889     level => $self->{level}->{must});
3890 wakaba 1.99 $self->{onerror}->(node => $child_node,
3891 wakaba 1.104 type => 'ps element missing',
3892     text => 'rp',
3893     level => $self->{level}->{must});
3894 wakaba 1.99 $element_state->{phase} = 'in-rb';
3895     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3896     $self->{onerror}->(node => $child_node,
3897 wakaba 1.104 type => 'ps element missing',
3898     text => 'rp',
3899     level => $self->{level}->{must});
3900 wakaba 1.99 $element_state->{phase} = 'in-rb';
3901     } else {
3902     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3903     }
3904     }
3905     },
3906     check_end => sub {
3907     my ($self, $item, $element_state) = @_;
3908     $self->_remove_minus_elements ($element_state);
3909    
3910     if ($element_state->{phase} eq 'before-rb') {
3911     $self->{onerror}->(node => $item->{node},
3912 wakaba 1.104 level => $self->{level}->{should},
3913 wakaba 1.99 type => 'no significant content');
3914     $self->{onerror}->(node => $item->{node},
3915 wakaba 1.104 type => 'element missing',
3916     text => 'rt',
3917     level => $self->{level}->{must});
3918 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3919     unless ($element_state->{has_significant}) {
3920     $self->{onerror}->(node => $item->{node},
3921 wakaba 1.104 level => $self->{level}->{should},
3922     type => 'no significant content at the end');
3923 wakaba 1.99 }
3924     $self->{onerror}->(node => $item->{node},
3925 wakaba 1.104 type => 'element missing',
3926     text => 'rt',
3927     level => $self->{level}->{must});
3928 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3929     $element_state->{phase} eq 'after-rp2') {
3930     #
3931     } elsif ($element_state->{phase} eq 'after-rp1') {
3932     $self->{onerror}->(node => $item->{node},
3933 wakaba 1.104 type => 'element missing',
3934     text => 'rt',
3935     level => $self->{level}->{must});
3936 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3937 wakaba 1.104 type => 'element missing',
3938     text => 'rp',
3939     level => $self->{level}->{must});
3940 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3941     $self->{onerror}->(node => $item->{node},
3942 wakaba 1.104 type => 'element missing',
3943     text => 'rp',
3944     level => $self->{level}->{must});
3945 wakaba 1.99 } else {
3946     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3947     }
3948    
3949     ## NOTE: A modified version of |check_end| of %AnyChecker.
3950     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3951     $item->{real_parent_state}->{has_significant} = 1;
3952     }
3953     },
3954     };
3955    
3956     $Element->{$HTML_NS}->{rt} = {
3957     %HTMLPhrasingContentChecker,
3958     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3959     check_attrs => $GetHTMLAttrsChecker->({}, {
3960     %HTMLAttrStatus,
3961     %HTMLM12NXHTML2CommonAttrStatus,
3962 wakaba 1.153 lang => FEATURE_HTML5_WD,
3963 wakaba 1.99 }),
3964     };
3965    
3966     $Element->{$HTML_NS}->{rp} = {
3967 wakaba 1.171 %HTMLPhrasingContentChecker,
3968 wakaba 1.99 status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3969     check_attrs => $GetHTMLAttrsChecker->({}, {
3970     %HTMLAttrStatus,
3971     %HTMLM12NXHTML2CommonAttrStatus,
3972 wakaba 1.153 lang => FEATURE_HTML5_WD,
3973 wakaba 1.99 }),
3974 wakaba 1.171 }; # rp
3975 wakaba 1.99
3976 wakaba 1.29 =pod
3977    
3978     ## TODO:
3979    
3980     +
3981     + <p>Partly because of the confusion described above, authors are
3982     + strongly recommended to always mark up all paragraphs with the
3983     + <code>p</code> element, and to not have any <code>ins</code> or
3984     + <code>del</code> elements that cross across any <span
3985     + title="paragraph">implied paragraphs</span>.</p>
3986     +
3987     (An informative note)
3988    
3989     <p><code>ins</code> elements should not cross <span
3990     + title="paragraph">implied paragraph</span> boundaries.</p>
3991     (normative)
3992    
3993     + <p><code>del</code> elements should not cross <span
3994     + title="paragraph">implied paragraph</span> boundaries.</p>
3995     (normative)
3996    
3997     =cut
3998    
3999 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4000 wakaba 1.40 %HTMLTransparentChecker,
4001 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4002 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4003 wakaba 1.1 cite => $HTMLURIAttrChecker,
4004 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4005 wakaba 1.49 }, {
4006     %HTMLAttrStatus,
4007     %HTMLM12NCommonAttrStatus,
4008 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4009 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4010     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4011 wakaba 1.1 }),
4012 wakaba 1.66 check_start => sub {
4013     my ($self, $item, $element_state) = @_;
4014    
4015     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4016 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4017     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4018 wakaba 1.66 },
4019 wakaba 1.1 };
4020    
4021     $Element->{$HTML_NS}->{del} = {
4022 wakaba 1.40 %HTMLTransparentChecker,
4023 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4024 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4025 wakaba 1.1 cite => $HTMLURIAttrChecker,
4026 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4027 wakaba 1.49 }, {
4028     %HTMLAttrStatus,
4029     %HTMLM12NCommonAttrStatus,
4030 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4031 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4032     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4033 wakaba 1.1 }),
4034 wakaba 1.40 check_end => sub {
4035     my ($self, $item, $element_state) = @_;
4036     if ($element_state->{has_significant}) {
4037     ## NOTE: Significantness flag does not propagate.
4038     } elsif ($item->{transparent}) {
4039     #
4040     } else {
4041     $self->{onerror}->(node => $item->{node},
4042 wakaba 1.104 level => $self->{level}->{should},
4043 wakaba 1.40 type => 'no significant content');
4044     }
4045 wakaba 1.1 },
4046 wakaba 1.66 check_start => sub {
4047     my ($self, $item, $element_state) = @_;
4048    
4049     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4050 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4051     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4052 wakaba 1.66 },
4053 wakaba 1.1 };
4054    
4055 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4056 wakaba 1.72 %HTMLFlowContentChecker,
4057 wakaba 1.153 status => FEATURE_HTML5_WD,
4058 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4059 wakaba 1.41 check_child_element => sub {
4060     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4061     $child_is_transparent, $element_state) = @_;
4062 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4063     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4064 wakaba 1.41 $self->{onerror}->(node => $child_el,
4065     type => 'element not allowed:minus',
4066 wakaba 1.104 level => $self->{level}->{must});
4067 wakaba 1.41 $element_state->{has_non_legend} = 1;
4068     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4069     #
4070     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4071     if ($element_state->{has_legend_at_first}) {
4072     $self->{onerror}->(node => $child_el,
4073     type => 'element not allowed:figure legend',
4074 wakaba 1.104 level => $self->{level}->{must});
4075 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4076     $self->{onerror}->(node => $element_state->{has_legend},
4077     type => 'element not allowed:figure legend',
4078 wakaba 1.104 level => $self->{level}->{must});
4079 wakaba 1.41 $element_state->{has_legend} = $child_el;
4080     } elsif ($element_state->{has_non_legend}) {
4081     $element_state->{has_legend} = $child_el;
4082     } else {
4083     $element_state->{has_legend_at_first} = 1;
4084 wakaba 1.35 }
4085 wakaba 1.41 delete $element_state->{has_non_legend};
4086     } else {
4087 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4088 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4089 wakaba 1.41 }
4090     },
4091     check_child_text => sub {
4092     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4093     if ($has_significant) {
4094     $element_state->{has_non_legend} = 1;
4095 wakaba 1.35 }
4096 wakaba 1.170
4097     $element_state->{in_figure} = 1;
4098 wakaba 1.41 },
4099     check_end => sub {
4100     my ($self, $item, $element_state) = @_;
4101 wakaba 1.35
4102 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4103     #
4104     } elsif ($element_state->{has_legend}) {
4105     if ($element_state->{has_non_legend}) {
4106     $self->{onerror}->(node => $element_state->{has_legend},
4107 wakaba 1.35 type => 'element not allowed:figure legend',
4108 wakaba 1.104 level => $self->{level}->{must});
4109 wakaba 1.35 }
4110     }
4111 wakaba 1.41
4112 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4113 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4114 wakaba 1.35 },
4115     };
4116 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4117 wakaba 1.1
4118 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4119     my ($self, $attr) = @_;
4120 wakaba 1.104 $self->{onerror}->(node => $attr,
4121     type => 'unknown attribute',
4122     level => $self->{level}->{uncertain});
4123 wakaba 1.92 };
4124    
4125 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4126 wakaba 1.40 %HTMLEmptyChecker,
4127 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4128 wakaba 1.40 check_attrs => sub {
4129     my ($self, $item, $element_state) = @_;
4130 wakaba 1.1 $GetHTMLAttrsChecker->({
4131 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4132     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4133     }),
4134 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4135 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4136 wakaba 1.1 src => $HTMLURIAttrChecker,
4137     usemap => $HTMLUsemapAttrChecker,
4138 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4139 wakaba 1.1 ismap => sub {
4140 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4141     if (not $self->{flag}->{in_a_href}) {
4142 wakaba 1.15 $self->{onerror}->(node => $attr,
4143 wakaba 1.59 type => 'attribute not allowed:ismap',
4144 wakaba 1.104 level => $self->{level}->{must});
4145 wakaba 1.1 }
4146 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4147 wakaba 1.1 },
4148 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4149     ## TODO: HTML4 |name|
4150 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4151 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4152 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4153 wakaba 1.49 }, {
4154     %HTMLAttrStatus,
4155 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4156 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4157 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4158 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4159 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4160 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4161 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4162     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4163 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4164     name => FEATURE_M12N10_REC_DEPRECATED,
4165 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4166 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4167     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4168 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4169 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4170 wakaba 1.66 })->($self, $item, $element_state);
4171 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4172     $self->{onerror}->(node => $item->{node},
4173 wakaba 1.104 type => 'attribute missing',
4174     text => 'alt',
4175     level => $self->{level}->{should});
4176 wakaba 1.114 ## TODO: ...
4177 wakaba 1.1 }
4178 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4179     $self->{onerror}->(node => $item->{node},
4180 wakaba 1.104 type => 'attribute missing',
4181     text => 'src',
4182     level => $self->{level}->{must});
4183 wakaba 1.1 }
4184 wakaba 1.66
4185 wakaba 1.114 ## TODO: external resource check
4186    
4187 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4188     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4189     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4190     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4191 wakaba 1.1 },
4192     };
4193    
4194     $Element->{$HTML_NS}->{iframe} = {
4195 wakaba 1.40 %HTMLTextChecker,
4196 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4197 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4198 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4199 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4200 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4201 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4202     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4203     }),
4204     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4205 wakaba 1.1 src => $HTMLURIAttrChecker,
4206 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4207 wakaba 1.49 }, {
4208     %HTMLAttrStatus,
4209     %HTMLM12NCommonAttrStatus,
4210     align => FEATURE_XHTML10_REC,
4211 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4212 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4213 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4214     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4215 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4216     marginheight => FEATURE_M12N10_REC,
4217     marginwidth => FEATURE_M12N10_REC,
4218 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4219     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4220     sandbox => FEATURE_HTML5_WD,
4221 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4222 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4223     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4224 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4225     width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4226 wakaba 1.1 }),
4227 wakaba 1.66 check_start => sub {
4228     my ($self, $item, $element_state) = @_;
4229    
4230     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4231 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4232     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4233 wakaba 1.66 },
4234 wakaba 1.40 };
4235    
4236 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4237 wakaba 1.40 %HTMLEmptyChecker,
4238 wakaba 1.98 status => FEATURE_HTML5_WD,
4239 wakaba 1.40 check_attrs => sub {
4240     my ($self, $item, $element_state) = @_;
4241 wakaba 1.1 my $has_src;
4242 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4243 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4244     $attr_ns = '' unless defined $attr_ns;
4245     my $attr_ln = $attr->manakai_local_name;
4246     my $checker;
4247 wakaba 1.73
4248     my $status = {
4249     %HTMLAttrStatus,
4250 wakaba 1.153 height => FEATURE_HTML5_LC,
4251 wakaba 1.98 src => FEATURE_HTML5_WD,
4252     type => FEATURE_HTML5_WD,
4253 wakaba 1.153 width => FEATURE_HTML5_LC,
4254 wakaba 1.73 }->{$attr_ln};
4255    
4256 wakaba 1.1 if ($attr_ns eq '') {
4257     if ($attr_ln eq 'src') {
4258     $checker = $HTMLURIAttrChecker;
4259     $has_src = 1;
4260     } elsif ($attr_ln eq 'type') {
4261     $checker = $HTMLIMTAttrChecker;
4262 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4263     $checker = $AttrCheckerNotImplemented; ## TODO: because spec does not define them yet.
4264 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4265     $attr_ln !~ /[A-Z]/) {
4266 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4267     $status = $HTMLDatasetAttrStatus;
4268 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4269 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4270 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4271 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4272     || sub { }; ## NOTE: Any local attribute is ok.
4273 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4274 wakaba 1.117 } else {
4275     $checker = $HTMLAttrChecker->{$attr_ln};
4276 wakaba 1.1 }
4277     }
4278     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4279 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4280     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4281     || $AttrStatus->{$attr_ns}->{''};
4282     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4283 wakaba 1.62
4284 wakaba 1.1 if ($checker) {
4285 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4286 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4287 wakaba 1.54 #
4288 wakaba 1.1 } else {
4289 wakaba 1.104 $self->{onerror}->(node => $attr,
4290     type => 'unknown attribute',
4291     level => $self->{level}->{uncertain});
4292 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4293     }
4294    
4295 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4296 wakaba 1.1 }
4297    
4298     unless ($has_src) {
4299 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4300 wakaba 1.104 type => 'attribute missing',
4301     text => 'src',
4302 wakaba 1.114 level => $self->{level}->{info});
4303     ## NOTE: <embed> without src="" is allowed since revision 1929.
4304     ## We issues an informational message since <embed> w/o src=""
4305     ## is likely an authoring error.
4306 wakaba 1.1 }
4307 wakaba 1.114
4308     ## TODO: external resource check
4309 wakaba 1.66
4310     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4311 wakaba 1.1 },
4312     };
4313    
4314 wakaba 1.49 ## TODO:
4315     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4316     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4317    
4318 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4319 wakaba 1.40 %HTMLTransparentChecker,
4320 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4321 wakaba 1.40 check_attrs => sub {
4322     my ($self, $item, $element_state) = @_;
4323 wakaba 1.1 $GetHTMLAttrsChecker->({
4324 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4325     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4326     }),
4327     archive => $HTMLSpaceURIsAttrChecker,
4328     ## TODO: Relative to @codebase
4329     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4330     classid => $HTMLURIAttrChecker,
4331     codebase => $HTMLURIAttrChecker,
4332     codetype => $HTMLIMTAttrChecker,
4333     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4334 wakaba 1.1 data => $HTMLURIAttrChecker,
4335 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4336     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4337     ## [HTML4] but we don't know how to test this.
4338 wakaba 1.167 form => $HTMLFormAttrChecker,
4339 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4340 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4341 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4342     ## the name of the browsing context created by the element,
4343     ## if any, but is also used as the form control name of the
4344     ## form control provided by the plugin, if any.
4345 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4346 wakaba 1.1 type => $HTMLIMTAttrChecker,
4347     usemap => $HTMLUsemapAttrChecker,
4348 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4349 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4350 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4351 wakaba 1.49 }, {
4352     %HTMLAttrStatus,
4353 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4354 wakaba 1.49 align => FEATURE_XHTML10_REC,
4355 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4356 wakaba 1.49 border => FEATURE_XHTML10_REC,
4357     classid => FEATURE_M12N10_REC,
4358     codebase => FEATURE_M12N10_REC,
4359     codetype => FEATURE_M12N10_REC,
4360 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4361 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4362 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4363     dataformatas => FEATURE_HTML4_REC_RESERVED,
4364     datasrc => FEATURE_HTML4_REC_RESERVED,
4365 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4366 wakaba 1.167 form => FEATURE_HTML5_DEFAULT,
4367 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4368 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4369 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4370     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4371 wakaba 1.49 standby => FEATURE_M12N10_REC,
4372 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4373 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4374     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4375 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4376 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4377 wakaba 1.66 })->($self, $item, $element_state);
4378 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4379     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4380     $self->{onerror}->(node => $item->{node},
4381 wakaba 1.104 type => 'attribute missing:data|type',
4382     level => $self->{level}->{must});
4383 wakaba 1.1 }
4384     }
4385 wakaba 1.66
4386     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4387     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4388     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4389     ## TODO: archive
4390     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4391 wakaba 1.1 },
4392 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4393 wakaba 1.41 check_child_element => sub {
4394     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4395     $child_is_transparent, $element_state) = @_;
4396 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4397     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4398 wakaba 1.41 $self->{onerror}->(node => $child_el,
4399     type => 'element not allowed:minus',
4400 wakaba 1.104 level => $self->{level}->{must});
4401 wakaba 1.41 $element_state->{has_non_legend} = 1;
4402     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4403     #
4404     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4405     if ($element_state->{has_non_param}) {
4406 wakaba 1.104 $self->{onerror}->(node => $child_el,
4407 wakaba 1.72 type => 'element not allowed:flow',
4408 wakaba 1.104 level => $self->{level}->{must});
4409 wakaba 1.39 }
4410 wakaba 1.41 } else {
4411 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4412 wakaba 1.41 $element_state->{has_non_param} = 1;
4413 wakaba 1.39 }
4414 wakaba 1.25 },
4415 wakaba 1.41 check_child_text => sub {
4416     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4417     if ($has_significant) {
4418     $element_state->{has_non_param} = 1;
4419     }
4420 wakaba 1.42 },
4421     check_end => sub {
4422     my ($self, $item, $element_state) = @_;
4423     if ($element_state->{has_significant}) {
4424 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4425 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4426     ## NOTE: Transparent.
4427     } else {
4428     $self->{onerror}->(node => $item->{node},
4429 wakaba 1.104 level => $self->{level}->{should},
4430 wakaba 1.42 type => 'no significant content');
4431     }
4432     },
4433 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4434 wakaba 1.1 };
4435 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4436     ## What about |<section><object data><style scoped></style>x</object></section>|?
4437     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4438 wakaba 1.1
4439     $Element->{$HTML_NS}->{param} = {
4440 wakaba 1.40 %HTMLEmptyChecker,
4441 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4442 wakaba 1.40 check_attrs => sub {
4443     my ($self, $item, $element_state) = @_;
4444 wakaba 1.1 $GetHTMLAttrsChecker->({
4445     name => sub { },
4446 wakaba 1.70 type => $HTMLIMTAttrChecker,
4447 wakaba 1.1 value => sub { },
4448 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4449     data => 1, ref => 1, object => 1,
4450     }),
4451 wakaba 1.49 }, {
4452     %HTMLAttrStatus,
4453 wakaba 1.154 href => FEATURE_RDFA_REC,
4454 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4455     name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4456 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4457 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4458 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4459 wakaba 1.66 })->(@_);
4460 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4461     $self->{onerror}->(node => $item->{node},
4462 wakaba 1.104 type => 'attribute missing',
4463     text => 'name',
4464     level => $self->{level}->{must});
4465 wakaba 1.1 }
4466 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4467     $self->{onerror}->(node => $item->{node},
4468 wakaba 1.104 type => 'attribute missing',
4469     text => 'value',
4470     level => $self->{level}->{must});
4471 wakaba 1.1 }
4472     },
4473     };
4474    
4475     $Element->{$HTML_NS}->{video} = {
4476 wakaba 1.40 %HTMLTransparentChecker,
4477 wakaba 1.48 status => FEATURE_HTML5_LC,
4478 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4479 wakaba 1.1 src => $HTMLURIAttrChecker,
4480     ## TODO: start, loopstart, loopend, end
4481     ## ISSUE: they MUST be "value time offset"s. Value?
4482 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4483 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4484     controls => $GetHTMLBooleanAttrChecker->('controls'),
4485 wakaba 1.59 poster => $HTMLURIAttrChecker,
4486 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4487     width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4488 wakaba 1.50 }, {
4489     %HTMLAttrStatus,
4490     autoplay => FEATURE_HTML5_LC,
4491     controls => FEATURE_HTML5_LC,
4492 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4493 wakaba 1.50 height => FEATURE_HTML5_LC,
4494 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4495     loopstart => FEATURE_HTML5_AT_RISK,
4496     playcount => FEATURE_HTML5_AT_RISK,
4497 wakaba 1.50 poster => FEATURE_HTML5_LC,
4498     src => FEATURE_HTML5_LC,
4499 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4500 wakaba 1.50 width => FEATURE_HTML5_LC,
4501 wakaba 1.1 }),
4502 wakaba 1.42 check_start => sub {
4503     my ($self, $item, $element_state) = @_;
4504     $element_state->{allow_source}
4505     = not $item->{node}->has_attribute_ns (undef, 'src');
4506     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4507     ## NOTE: It might be set true by |check_element|.
4508 wakaba 1.66
4509     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4510     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4511 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4512     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4513 wakaba 1.42 },
4514     check_child_element => sub {
4515     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4516     $child_is_transparent, $element_state) = @_;
4517 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4518     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4519 wakaba 1.42 $self->{onerror}->(node => $child_el,
4520     type => 'element not allowed:minus',
4521 wakaba 1.104 level => $self->{level}->{must});
4522 wakaba 1.42 delete $element_state->{allow_source};
4523     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4524     #
4525     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4526 wakaba 1.45 unless ($element_state->{allow_source}) {
4527 wakaba 1.104 $self->{onerror}->(node => $child_el,
4528 wakaba 1.72 type => 'element not allowed:flow',
4529 wakaba 1.104 level => $self->{level}->{must});
4530 wakaba 1.42 }
4531 wakaba 1.45 $element_state->{has_source} = 1;
4532 wakaba 1.1 } else {
4533 wakaba 1.42 delete $element_state->{allow_source};
4534 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4535 wakaba 1.42 }
4536     },
4537     check_child_text => sub {
4538     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4539     if ($has_significant) {
4540     delete $element_state->{allow_source};
4541     }
4542 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4543 wakaba 1.42 },
4544     check_end => sub {
4545     my ($self, $item, $element_state) = @_;
4546     if ($element_state->{has_source} == -1) {
4547     $self->{onerror}->(node => $item->{node},
4548 wakaba 1.104 type => 'child element missing',
4549     text => 'source',
4550     level => $self->{level}->{must});
4551 wakaba 1.1 }
4552 wakaba 1.42
4553     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4554 wakaba 1.1 },
4555     };
4556    
4557     $Element->{$HTML_NS}->{audio} = {
4558 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4559 wakaba 1.48 status => FEATURE_HTML5_LC,
4560 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4561     src => $HTMLURIAttrChecker,
4562     ## TODO: start, loopstart, loopend, end
4563     ## ISSUE: they MUST be "value time offset"s. Value?
4564     ## ISSUE: playcount has no conformance creteria
4565     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4566     controls => $GetHTMLBooleanAttrChecker->('controls'),
4567 wakaba 1.50 }, {
4568     %HTMLAttrStatus,
4569     autoplay => FEATURE_HTML5_LC,
4570     controls => FEATURE_HTML5_LC,
4571 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4572     loopend => FEATURE_HTML5_AT_RISK,
4573     loopstart => FEATURE_HTML5_AT_RISK,
4574     playcount => FEATURE_HTML5_AT_RISK,
4575 wakaba 1.50 src => FEATURE_HTML5_LC,
4576 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4577 wakaba 1.42 }),
4578 wakaba 1.1 };
4579    
4580     $Element->{$HTML_NS}->{source} = {
4581 wakaba 1.40 %HTMLEmptyChecker,
4582 wakaba 1.153 status => FEATURE_HTML5_LC,
4583 wakaba 1.40 check_attrs => sub {
4584     my ($self, $item, $element_state) = @_;
4585 wakaba 1.1 $GetHTMLAttrsChecker->({
4586 wakaba 1.90 media => $HTMLMQAttrChecker,
4587     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4588     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4589 wakaba 1.1 type => $HTMLIMTAttrChecker,
4590 wakaba 1.50 }, {
4591     %HTMLAttrStatus,
4592 wakaba 1.153 media => FEATURE_HTML5_LC,
4593     pixelratio => FEATURE_HTML5_LC,
4594     src => FEATURE_HTML5_LC,
4595     type => FEATURE_HTML5_LC,
4596 wakaba 1.66 })->(@_);
4597 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4598     $self->{onerror}->(node => $item->{node},
4599 wakaba 1.104 type => 'attribute missing',
4600     text => 'src',
4601     level => $self->{level}->{must});
4602 wakaba 1.1 }
4603 wakaba 1.66
4604     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4605 wakaba 1.1 },
4606     };
4607    
4608     $Element->{$HTML_NS}->{canvas} = {
4609 wakaba 1.40 %HTMLTransparentChecker,
4610 wakaba 1.89 status => FEATURE_HTML5_COMPLETE,
4611 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4612 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4613     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4614 wakaba 1.50 }, {
4615     %HTMLAttrStatus,
4616 wakaba 1.89 height => FEATURE_HTML5_COMPLETE,
4617     width => FEATURE_HTML5_COMPLETE,
4618 wakaba 1.1 }),
4619     };
4620    
4621     $Element->{$HTML_NS}->{map} = {
4622 wakaba 1.72 %HTMLFlowContentChecker,
4623 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4624 wakaba 1.40 check_attrs => sub {
4625     my ($self, $item, $element_state) = @_;
4626 wakaba 1.100 my $has_name;
4627 wakaba 1.4 $GetHTMLAttrsChecker->({
4628 wakaba 1.100 name => sub {
4629     my ($self, $attr) = @_;
4630     my $value = $attr->value;
4631     if (length $value) {
4632     ## NOTE: Duplication is not non-conforming.
4633     ## NOTE: Space characters are not non-conforming.
4634     #
4635     } else {
4636     $self->{onerror}->(node => $attr,
4637     type => 'empty attribute value',
4638 wakaba 1.104 level => $self->{level}->{must});
4639 wakaba 1.100 }
4640 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4641 wakaba 1.100 $has_name = [$value, $attr];
4642 wakaba 1.4 },
4643 wakaba 1.49 }, {
4644     %HTMLAttrStatus,
4645 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4646     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4647     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4648     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4649     #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4650     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4651 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4652     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4653     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4654     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4655     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4656     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4657     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4658     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4659     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4660     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4661 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4662 wakaba 1.66 })->(@_);
4663 wakaba 1.100
4664 wakaba 1.135 if ($has_name) {
4665 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4666 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4667 wakaba 1.155 $self->{onerror}
4668     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4669     type => 'id ne name',
4670     level => $self->{level}->{must});
4671 wakaba 1.100 }
4672 wakaba 1.135 } else {
4673 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4674 wakaba 1.104 type => 'attribute missing',
4675     text => 'name',
4676     level => $self->{level}->{must});
4677 wakaba 1.100 }
4678 wakaba 1.4 },
4679 wakaba 1.59 check_start => sub {
4680     my ($self, $item, $element_state) = @_;
4681     $element_state->{in_map_original} = $self->{flag}->{in_map};
4682 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4683     ## NOTE: |{in_map}| is a reference to the array which contains
4684     ## hash references. Hashes are corresponding to the opening
4685     ## |map| elements and each of them contains the key-value
4686     ## pairs corresponding to the absolute URLs for the processed
4687     ## |area| elements in the |map| element corresponding to the
4688     ## hash. The key represents the resource (## TODO: use
4689     ## absolute URL), while the value represents whether there is
4690     ## an |area| element whose |alt| attribute is specified to a
4691     ## non-empty value. If there IS such an |area| element for
4692     ## the resource specified by the key, then the value is set to
4693     ## zero (|0|). Otherwise, if there is no such an |area|
4694     ## element but there is any |area| element with the empty
4695     ## |alt=""| attribute, then the value contains an array
4696     ## reference that contains all of such |area| elements.
4697 wakaba 1.79
4698     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4699     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4700 wakaba 1.59 },
4701     check_end => sub {
4702     my ($self, $item, $element_state) = @_;
4703 wakaba 1.137
4704     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4705     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4706     next unless $nodes;
4707     for (@$nodes) {
4708     $self->{onerror}->(type => 'empty area alt',
4709     node => $_,
4710     level => $self->{level}->{html5_no_may});
4711     }
4712     }
4713    
4714     $self->{flag}->{in_map} = $element_state->{in_map_original};
4715    
4716 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4717 wakaba 1.59 },
4718 wakaba 1.1 };
4719    
4720     $Element->{$HTML_NS}->{area} = {
4721 wakaba 1.40 %HTMLEmptyChecker,
4722 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4723 wakaba 1.40 check_attrs => sub {
4724     my ($self, $item, $element_state) = @_;
4725 wakaba 1.1 my %attr;
4726     my $coords;
4727 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4728 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4729     $attr_ns = '' unless defined $attr_ns;
4730     my $attr_ln = $attr->manakai_local_name;
4731     my $checker;
4732 wakaba 1.73 my $status;
4733 wakaba 1.1 if ($attr_ns eq '') {
4734 wakaba 1.73 $status = {
4735     %HTMLAttrStatus,
4736     %HTMLM12NCommonAttrStatus,
4737     accesskey => FEATURE_M12N10_REC,
4738 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4739     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4740 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4741 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4742     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4743 wakaba 1.154 media => FEATURE_HTML5_WD,
4744 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4745     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4746     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4747 wakaba 1.153 ping => FEATURE_HTML5_WD,
4748 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4749 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4750 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4751 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4752     type => FEATURE_HTML5_WD,
4753 wakaba 1.73 }->{$attr_ln};
4754    
4755 wakaba 1.1 $checker = {
4756 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4757 wakaba 1.153 alt => sub {
4758     ## NOTE: Checked later.
4759     },
4760 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4761     circ => -1, circle => 1,
4762     default => 1,
4763     poly => 1, polygon => -1,
4764     rect => 1, rectangle => -1,
4765     }),
4766     coords => sub {
4767     my ($self, $attr) = @_;
4768     my $value = $attr->value;
4769     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4770     $coords = [split /,/, $value];
4771     } else {
4772     $self->{onerror}->(node => $attr,
4773 wakaba 1.104 type => 'coords:syntax error',
4774     level => $self->{level}->{must});
4775 wakaba 1.1 }
4776     },
4777 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4778     target => $HTMLTargetAttrChecker,
4779 wakaba 1.1 href => $HTMLURIAttrChecker,
4780     ping => $HTMLSpaceURIsAttrChecker,
4781 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4782 wakaba 1.1 media => $HTMLMQAttrChecker,
4783     hreflang => $HTMLLanguageTagAttrChecker,
4784     type => $HTMLIMTAttrChecker,
4785     }->{$attr_ln};
4786     if ($checker) {
4787     $attr{$attr_ln} = $attr;
4788 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4789     $attr_ln !~ /[A-Z]/) {
4790 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4791     $status = $HTMLDatasetAttrStatus;
4792 wakaba 1.1 } else {
4793     $checker = $HTMLAttrChecker->{$attr_ln};
4794     }
4795     }
4796     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4797 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4798     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4799     || $AttrStatus->{$attr_ns}->{''};
4800     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4801 wakaba 1.62
4802 wakaba 1.1 if ($checker) {
4803 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4804 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4805 wakaba 1.54 #
4806 wakaba 1.1 } else {
4807 wakaba 1.104 $self->{onerror}->(node => $attr,
4808     type => 'unknown attribute',
4809     level => $self->{level}->{uncertain});
4810 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4811     }
4812 wakaba 1.49
4813 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4814 wakaba 1.1 }
4815    
4816     if (defined $attr{href}) {
4817 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4818 wakaba 1.137 if (defined $attr{alt}) {
4819     my $url = $attr{href}->value; ## TODO: resolve
4820     if (length $attr{alt}->value) {
4821     for (@{$self->{flag}->{in_map} or []}) {
4822     $_->{$url} = 0;
4823     }
4824     } else {
4825     ## NOTE: Empty |alt=""|. If there is another |area| element
4826     ## with the same |href=""| and that |area| elemnet's
4827     ## |alt=""| attribute is not an empty string, then this
4828     ## is conforming.
4829     for (@{$self->{flag}->{in_map} or []}) {
4830     push @{$_->{$url} ||= []}, $attr{alt}
4831     unless exists $_->{$url} and not $_->{$url};
4832     }
4833     }
4834     } else {
4835 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4836 wakaba 1.104 type => 'attribute missing',
4837     text => 'alt',
4838     level => $self->{level}->{must});
4839 wakaba 1.1 }
4840     } else {
4841     for (qw/target ping rel media hreflang type alt/) {
4842     if (defined $attr{$_}) {
4843     $self->{onerror}->(node => $attr{$_},
4844 wakaba 1.104 type => 'attribute not allowed',
4845     level => $self->{level}->{must});
4846 wakaba 1.1 }
4847     }
4848     }
4849    
4850     my $shape = 'rectangle';
4851     if (defined $attr{shape}) {
4852     $shape = {
4853     circ => 'circle', circle => 'circle',
4854     default => 'default',
4855     poly => 'polygon', polygon => 'polygon',
4856     rect => 'rectangle', rectangle => 'rectangle',
4857     }->{lc $attr{shape}->value} || 'rectangle';
4858     ## TODO: ASCII lowercase?
4859     }
4860    
4861     if ($shape eq 'circle') {
4862     if (defined $attr{coords}) {
4863     if (defined $coords) {
4864     if (@$coords == 3) {
4865     if ($coords->[2] < 0) {
4866     $self->{onerror}->(node => $attr{coords},
4867 wakaba 1.104 type => 'coords:out of range',
4868     index => 2,
4869     value => $coords->[2],
4870     level => $self->{level}->{must});
4871 wakaba 1.1 }
4872     } else {
4873     $self->{onerror}->(node => $attr{coords},
4874 wakaba 1.104 type => 'coords:number not 3',
4875     text => 0+@$coords,
4876     level => $self->{level}->{must});
4877 wakaba 1.1 }
4878     } else {
4879     ## NOTE: A syntax error has been reported.
4880     }
4881     } else {
4882 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4883 wakaba 1.104 type => 'attribute missing',
4884     text => 'coords',
4885     level => $self->{level}->{must});
4886 wakaba 1.1 }
4887     } elsif ($shape eq 'default') {
4888     if (defined $attr{coords}) {
4889     $self->{onerror}->(node => $attr{coords},
4890 wakaba 1.104 type => 'attribute not allowed',
4891     level => $self->{level}->{must});
4892 wakaba 1.1 }
4893     } elsif ($shape eq 'polygon') {
4894     if (defined $attr{coords}) {
4895     if (defined $coords) {
4896     if (@$coords >= 6) {
4897     unless (@$coords % 2 == 0) {
4898     $self->{onerror}->(node => $attr{coords},
4899 wakaba 1.104 type => 'coords:number not even',
4900     text => 0+@$coords,
4901     level => $self->{level}->{must});
4902 wakaba 1.1 }
4903     } else {
4904     $self->{onerror}->(node => $attr{coords},
4905 wakaba 1.104 type => 'coords:number lt 6',
4906     text => 0+@$coords,
4907     level => $self->{level}->{must});
4908 wakaba 1.1 }
4909     } else {
4910     ## NOTE: A syntax error has been reported.
4911     }
4912     } else {
4913 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4914 wakaba 1.104 type => 'attribute missing',
4915     text => 'coords',
4916     level => $self->{level}->{must});
4917 wakaba 1.1 }
4918     } elsif ($shape eq 'rectangle') {
4919     if (defined $attr{coords}) {
4920     if (defined $coords) {
4921     if (@$coords == 4) {
4922     unless ($coords->[0] < $coords->[2]) {
4923     $self->{onerror}->(node => $attr{coords},
4924 wakaba 1.104 type => 'coords:out of range',
4925     index => 0,
4926     value => $coords->[0],
4927     level => $self->{level}->{must});
4928 wakaba 1.1 }
4929     unless ($coords->[1] < $coords->[3]) {
4930     $self->{onerror}->(node => $attr{coords},
4931 wakaba 1.104 type => 'coords:out of range',
4932     index => 1,
4933     value => $coords->[1],
4934     level => $self->{level}->{must});
4935 wakaba 1.1 }
4936     } else {
4937     $self->{onerror}->(node => $attr{coords},
4938 wakaba 1.104 type => 'coords:number not 4',
4939     text => 0+@$coords,
4940     level => $self->{level}->{must});
4941 wakaba 1.1 }
4942     } else {
4943     ## NOTE: A syntax error has been reported.
4944     }
4945     } else {
4946 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4947 wakaba 1.104 type => 'attribute missing',
4948     text => 'coords',
4949     level => $self->{level}->{must});
4950 wakaba 1.1 }
4951     }
4952 wakaba 1.66
4953     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
4954 wakaba 1.1 },
4955 wakaba 1.59 check_start => sub {
4956     my ($self, $item, $element_state) = @_;
4957     unless ($self->{flag}->{in_map} or
4958     not $item->{node}->manakai_parent_element) {
4959     $self->{onerror}->(node => $item->{node},
4960     type => 'element not allowed:area',
4961 wakaba 1.104 level => $self->{level}->{must});
4962 wakaba 1.59 }
4963 wakaba 1.79
4964     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4965     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4966 wakaba 1.59 },
4967 wakaba 1.1 };
4968    
4969     $Element->{$HTML_NS}->{table} = {
4970 wakaba 1.40 %HTMLChecker,
4971 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4972 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
4973 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
4974     cellspacing => $HTMLLengthAttrChecker,
4975 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
4976     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
4977     lhs => 1, rhs => 1, box => 1, border => 1,
4978     }),
4979     rules => $GetHTMLEnumeratedAttrChecker->({
4980     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
4981     }),
4982     summary => sub {}, ## NOTE: %Text; in HTML4.
4983     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
4984     }, {
4985 wakaba 1.49 %HTMLAttrStatus,
4986 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4987 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4988     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
4989     border => FEATURE_M12N10_REC,
4990     cellpadding => FEATURE_M12N10_REC,
4991     cellspacing => FEATURE_M12N10_REC,
4992 wakaba 1.61 cols => FEATURE_RFC1942,
4993 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4994     dataformatas => FEATURE_HTML4_REC_RESERVED,
4995     datapagesize => FEATURE_M12N10_REC,
4996     datasrc => FEATURE_HTML4_REC_RESERVED,
4997     frame => FEATURE_M12N10_REC,
4998 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4999 wakaba 1.49 rules => FEATURE_M12N10_REC,
5000     summary => FEATURE_M12N10_REC,
5001     width => FEATURE_M12N10_REC,
5002     }),
5003 wakaba 1.40 check_start => sub {
5004     my ($self, $item, $element_state) = @_;
5005     $element_state->{phase} = 'before caption';
5006 wakaba 1.66
5007     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5008 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5009     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5010 wakaba 1.40 },
5011     check_child_element => sub {
5012     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5013     $child_is_transparent, $element_state) = @_;
5014 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5015     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5016 wakaba 1.40 $self->{onerror}->(node => $child_el,
5017     type => 'element not allowed:minus',
5018 wakaba 1.104 level => $self->{level}->{must});
5019 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5020     #
5021     } elsif ($element_state->{phase} eq 'in tbodys') {
5022     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5023     #$element_state->{phase} = 'in tbodys';
5024     } elsif (not $element_state->{has_tfoot} and
5025     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5026     $element_state->{phase} = 'after tfoot';
5027     $element_state->{has_tfoot} = 1;
5028     } else {
5029 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5030     level => $self->{level}->{must});
5031 wakaba 1.40 }
5032     } elsif ($element_state->{phase} eq 'in trs') {
5033     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5034     #$element_state->{phase} = 'in trs';
5035     } elsif (not $element_state->{has_tfoot} and
5036     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5037     $element_state->{phase} = 'after tfoot';
5038     $element_state->{has_tfoot} = 1;
5039     } else {
5040 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5041     level => $self->{level}->{must});
5042 wakaba 1.40 }
5043     } elsif ($element_state->{phase} eq 'after thead') {
5044     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5045     $element_state->{phase} = 'in tbodys';
5046     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5047     $element_state->{phase} = 'in trs';
5048     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5049     $element_state->{phase} = 'in tbodys';
5050     $element_state->{has_tfoot} = 1;
5051     } else {
5052 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5053     level => $self->{level}->{must});
5054 wakaba 1.40 }
5055     } elsif ($element_state->{phase} eq 'in colgroup') {
5056     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5057     $element_state->{phase} = 'in colgroup';
5058     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5059     $element_state->{phase} = 'after thead';
5060     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5061     $element_state->{phase} = 'in tbodys';
5062     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5063     $element_state->{phase} = 'in trs';
5064     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5065     $element_state->{phase} = 'in tbodys';
5066     $element_state->{has_tfoot} = 1;
5067     } else {
5068 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5069     level => $self->{level}->{must});
5070 wakaba 1.40 }
5071     } elsif ($element_state->{phase} eq 'before caption') {
5072     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5073     $element_state->{phase} = 'in colgroup';
5074     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5075     $element_state->{phase} = 'in colgroup';
5076     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5077     $element_state->{phase} = 'after thead';
5078     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5079     $element_state->{phase} = 'in tbodys';
5080     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5081     $element_state->{phase} = 'in trs';
5082     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5083     $element_state->{phase} = 'in tbodys';
5084     $element_state->{has_tfoot} = 1;
5085     } else {
5086 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5087     level => $self->{level}->{must});
5088 wakaba 1.40 }
5089     } elsif ($element_state->{phase} eq 'after tfoot') {
5090 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5091     level => $self->{level}->{must});
5092 wakaba 1.40 } else {
5093     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5094     }
5095     },
5096     check_child_text => sub {
5097     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5098     if ($has_significant) {
5099 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5100     level => $self->{level}->{must});
5101 wakaba 1.1 }
5102 wakaba 1.40 },
5103     check_end => sub {
5104     my ($self, $item, $element_state) = @_;
5105 wakaba 1.1
5106     ## Table model errors
5107     require Whatpm::HTMLTable;
5108 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5109 wakaba 1.104 $self->{onerror}->(@_);
5110     }, $self->{level});
5111 wakaba 1.87 Whatpm::HTMLTable->assign_header
5112 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5113 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5114 wakaba 1.1
5115 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5116 wakaba 1.1 },
5117     };
5118    
5119     $Element->{$HTML_NS}->{caption} = {
5120 wakaba 1.169 %HTMLFlowContentChecker,
5121 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5122 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5123     align => $GetHTMLEnumeratedAttrChecker->({
5124     top => 1, bottom => 1, left => 1, right => 1,
5125     }),
5126     }, {
5127 wakaba 1.49 %HTMLAttrStatus,
5128 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5129 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5130 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5131 wakaba 1.49 }),
5132 wakaba 1.169 check_start => sub {
5133     my ($self, $item, $element_state) = @_;
5134     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5135    
5136     $HTMLFlowContentChecker{check_start}->(@_);
5137     },
5138     check_end => sub {
5139     my ($self, $item, $element_state) = @_;
5140     $self->_remove_minus_elements ($element_state);
5141    
5142     $HTMLFlowContentChecker{check_end}->(@_);
5143     },
5144     }; # caption
5145 wakaba 1.1
5146 wakaba 1.69 my %cellalign = (
5147     ## HTML4 %cellhalign;
5148 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5149     left => 1, center => 1, right => 1, justify => 1, char => 1,
5150     }),
5151     char => sub {
5152     my ($self, $attr) = @_;
5153 wakaba 1.69
5154 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5155    
5156     my $value = $attr->value;
5157     if (length $value != 1) {
5158     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5159 wakaba 1.105 level => $self->{level}->{html4_fact});
5160 wakaba 1.70 }
5161     },
5162 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5163    
5164 wakaba 1.69 ## HTML4 %cellvalign;
5165 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5166     top => 1, middle => 1, bottom => 1, baseline => 1,
5167     }),
5168 wakaba 1.69 );
5169    
5170 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5171 wakaba 1.40 %HTMLEmptyChecker,
5172 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5173 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5174 wakaba 1.69 %cellalign,
5175 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5176     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5177     ## TODO: "attribute not supported" if |col|.
5178     ## ISSUE: MUST NOT if any |col|?
5179     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5180 wakaba 1.49 }, {
5181     %HTMLAttrStatus,
5182 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5183 wakaba 1.49 align => FEATURE_M12N10_REC,
5184     char => FEATURE_M12N10_REC,
5185     charoff => FEATURE_M12N10_REC,
5186 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5187     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5188 wakaba 1.49 valign => FEATURE_M12N10_REC,
5189     width => FEATURE_M12N10_REC,
5190 wakaba 1.1 }),
5191 wakaba 1.40 check_child_element => sub {
5192     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5193     $child_is_transparent, $element_state) = @_;
5194 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5195     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5196 wakaba 1.40 $self->{onerror}->(node => $child_el,
5197     type => 'element not allowed:minus',
5198 wakaba 1.104 level => $self->{level}->{must});
5199 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5200     #
5201     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5202     #
5203     } else {
5204 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5205     level => $self->{level}->{must});
5206 wakaba 1.40 }
5207     },
5208     check_child_text => sub {
5209     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5210     if ($has_significant) {
5211 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5212     level => $self->{level}->{must});
5213 wakaba 1.1 }
5214     },
5215     };
5216    
5217     $Element->{$HTML_NS}->{col} = {
5218 wakaba 1.40 %HTMLEmptyChecker,
5219 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5220 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5221 wakaba 1.69 %cellalign,
5222 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5223 wakaba 1.49 }, {
5224     %HTMLAttrStatus,
5225 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5226 wakaba 1.49 align => FEATURE_M12N10_REC,
5227     char => FEATURE_M12N10_REC,
5228     charoff => FEATURE_M12N10_REC,
5229 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5230     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5231 wakaba 1.49 valign => FEATURE_M12N10_REC,
5232     width => FEATURE_M12N10_REC,
5233 wakaba 1.1 }),
5234     };
5235    
5236     $Element->{$HTML_NS}->{tbody} = {
5237 wakaba 1.40 %HTMLChecker,
5238 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5239 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5240     %cellalign,
5241     }, {
5242 wakaba 1.49 %HTMLAttrStatus,
5243 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5244 wakaba 1.49 align => FEATURE_M12N10_REC,
5245     char => FEATURE_M12N10_REC,
5246     charoff => FEATURE_M12N10_REC,
5247 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5248 wakaba 1.49 valign => FEATURE_M12N10_REC,
5249     }),
5250 wakaba 1.40 check_child_element => sub {
5251     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5252     $child_is_transparent, $element_state) = @_;
5253 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5254     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5255 wakaba 1.40 $self->{onerror}->(node => $child_el,
5256     type => 'element not allowed:minus',
5257 wakaba 1.104 level => $self->{level}->{must});
5258 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5259     #
5260     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5261 wakaba 1.84 #
5262 wakaba 1.40 } else {
5263 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5264     level => $self->{level}->{must});
5265 wakaba 1.40 }
5266     },
5267     check_child_text => sub {
5268     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5269     if ($has_significant) {
5270 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5271     level => $self->{level}->{must});
5272 wakaba 1.1 }
5273 wakaba 1.40 },
5274 wakaba 1.1 };
5275    
5276     $Element->{$HTML_NS}->{thead} = {
5277 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5278 wakaba 1.1 };
5279    
5280     $Element->{$HTML_NS}->{tfoot} = {
5281 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5282 wakaba 1.1 };
5283    
5284     $Element->{$HTML_NS}->{tr} = {
5285 wakaba 1.40 %HTMLChecker,
5286 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5287 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5288     %cellalign,
5289     bgcolor => $HTMLColorAttrChecker,
5290     }, {
5291 wakaba 1.49 %HTMLAttrStatus,
5292 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5293 wakaba 1.49 align => FEATURE_M12N10_REC,
5294     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5295     char => FEATURE_M12N10_REC,
5296     charoff => FEATURE_M12N10_REC,
5297 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5298 wakaba 1.49 valign => FEATURE_M12N10_REC,
5299     }),
5300 wakaba 1.40 check_child_element => sub {
5301     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5302     $child_is_transparent, $element_state) = @_;
5303 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5304     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5305 wakaba 1.40 $self->{onerror}->(node => $child_el,
5306     type => 'element not allowed:minus',
5307 wakaba 1.104 level => $self->{level}->{must});
5308 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5309     #
5310     } elsif ($child_nsuri eq $HTML_NS and
5311     ($child_ln eq 'td' or $child_ln eq 'th')) {
5312 wakaba 1.84 #
5313 wakaba 1.40 } else {
5314 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5315     level => $self->{level}->{must});
5316 wakaba 1.40 }
5317     },
5318     check_child_text => sub {
5319     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5320     if ($has_significant) {
5321 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5322     level => $self->{level}->{must});
5323 wakaba 1.1 }
5324     },
5325     };
5326    
5327     $Element->{$HTML_NS}->{td} = {
5328 wakaba 1.72 %HTMLFlowContentChecker,
5329 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5330 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5331 wakaba 1.69 %cellalign,
5332     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5333     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5334     bgcolor => $HTMLColorAttrChecker,
5335 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5336 wakaba 1.87 headers => sub {
5337     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5338     ## Though that method does not check the |headers| attribute of a
5339     ## |td| element if the element does not form a table, in that case
5340     ## the |td| element is non-conforming anyway.
5341     },
5342 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5343 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5344 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5345     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5346 wakaba 1.49 }, {
5347     %HTMLAttrStatus,
5348 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5349     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5350 wakaba 1.49 align => FEATURE_M12N10_REC,
5351 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5352 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5353     char => FEATURE_M12N10_REC,
5354     charoff => FEATURE_M12N10_REC,
5355 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5356     headers => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5357 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5358 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5359 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5360 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5361 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5362 wakaba 1.49 valign => FEATURE_M12N10_REC,
5363     width => FEATURE_M12N10_REC_DEPRECATED,
5364 wakaba 1.1 }),
5365     };
5366    
5367     $Element->{$HTML_NS}->{th} = {
5368 wakaba 1.40 %HTMLPhrasingContentChecker,
5369 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5370 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5371 wakaba 1.69 %cellalign,
5372     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5373     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5374     bgcolor => $HTMLColorAttrChecker,
5375 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5376 wakaba 1.87 ## TODO: HTML4(?) |headers|
5377 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5378 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5379     scope => $GetHTMLEnumeratedAttrChecker
5380     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5381 wakaba 1.49 }, {
5382     %HTMLAttrStatus,
5383 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5384     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5385 wakaba 1.49 align => FEATURE_M12N10_REC,
5386 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5387 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5388     char => FEATURE_M12N10_REC,
5389     charoff => FEATURE_M12N10_REC,
5390 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5391 wakaba 1.82 headers => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5392 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5393 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5394 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5395 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5396     scope => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5397 wakaba 1.49 valign => FEATURE_M12N10_REC,
5398     width => FEATURE_M12N10_REC_DEPRECATED,
5399 wakaba 1.1 }),
5400     };
5401    
5402 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5403 wakaba 1.121 %HTMLFlowContentChecker,
5404 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5405 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5406 wakaba 1.161 accept => $AcceptAttrChecker,
5407 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5408 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5409 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5410 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5411     'application/x-www-form-urlencoded' => 1,
5412     'multipart/form-data' => 1,
5413     'text/plain' => 1,
5414     }),
5415 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5416     get => 1, post => 1, put => 1, delete => 1,
5417     }),
5418 wakaba 1.133 name => sub {
5419     my ($self, $attr) = @_;
5420    
5421     my $value = $attr->value;
5422     if ($value eq '') {
5423     $self->{onerror}->(type => 'empty form name',
5424     node => $attr,
5425     level => $self->{level}->{must});
5426     } else {
5427     if ($self->{form}->{$value}) {
5428     $self->{onerror}->(type => 'duplicate form name',
5429     node => $attr,
5430     value => $value,
5431     level => $self->{level}->{must});
5432     } else {
5433     $self->{form}->{$value} = 1;
5434     }
5435     }
5436     },
5437 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5438     ## TODO: Tests for following attrs:
5439 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5440     onforminput => $HTMLEventHandlerAttrChecker,
5441 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5442     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5443 wakaba 1.52 target => $HTMLTargetAttrChecker,
5444     }, {
5445     %HTMLAttrStatus,
5446     %HTMLM12NCommonAttrStatus,
5447 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5448 wakaba 1.119 'accept-charset' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5449     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5450 wakaba 1.56 data => FEATURE_WF2,
5451 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5452 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5453 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5454     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5455     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5456 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5457 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5458     onforminput => FEATURE_WF2_INFORMATIVE,
5459 wakaba 1.56 onreceived => FEATURE_WF2,
5460 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5461     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5462 wakaba 1.56 replace => FEATURE_WF2,
5463 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5464     sdasuff => FEATURE_HTML20_RFC,
5465 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5466 wakaba 1.52 }),
5467 wakaba 1.66 check_start => sub {
5468     my ($self, $item, $element_state) = @_;
5469 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5470 wakaba 1.66
5471     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5472     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5473 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5474     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5475 wakaba 1.136 $element_state->{id_type} = 'form';
5476 wakaba 1.66 },
5477 wakaba 1.121 check_end => sub {
5478     my ($self, $item, $element_state) = @_;
5479     $self->_remove_minus_elements ($element_state);
5480    
5481     $HTMLFlowContentChecker{check_end}->(@_);
5482     },
5483 wakaba 1.52 };
5484    
5485     $Element->{$HTML_NS}->{fieldset} = {
5486 wakaba 1.134 %HTMLFlowContentChecker,
5487 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5488 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5489     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5490 wakaba 1.136 form => $HTMLFormAttrChecker,
5491 wakaba 1.165 name => $FormControlNameAttrChecker,
5492 wakaba 1.56 }, {
5493 wakaba 1.52 %HTMLAttrStatus,
5494     %HTMLM12NCommonAttrStatus,
5495 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5496     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5497 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5498 wakaba 1.125 name => FEATURE_HTML5_DEFAULT,
5499 wakaba 1.52 }),
5500 wakaba 1.134 ## NOTE: legend, Flow
5501     check_child_element => sub {
5502     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5503     $child_is_transparent, $element_state) = @_;
5504     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5505     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5506     $self->{onerror}->(node => $child_el,
5507     type => 'element not allowed:minus',
5508     level => $self->{level}->{must});
5509     $element_state->{has_non_legend} = 1;
5510     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5511     #
5512     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5513     if ($element_state->{has_non_legend}) {
5514     $self->{onerror}->(node => $child_el,
5515     type => 'element not allowed:details legend',
5516     level => $self->{level}->{must});
5517     }
5518     $element_state->{has_legend} = 1;
5519     $element_state->{has_non_legend} = 1;
5520     } else {
5521     $HTMLFlowContentChecker{check_child_element}->(@_);
5522     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5523     ## TODO:
5524 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5525 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5526     ## therefore |details| part of the content model does not match.
5527     }
5528     },
5529     check_child_text => sub {
5530     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5531     if ($has_significant) {
5532     $element_state->{has_non_legend} = 1;
5533     }
5534     },
5535     check_end => sub {
5536     my ($self, $item, $element_state) = @_;
5537    
5538     unless ($element_state->{has_legend}) {
5539     $self->{onerror}->(node => $item->{node},
5540     type => 'child element missing',
5541     text => 'legend',
5542     level => $self->{level}->{must});
5543     }
5544    
5545     $HTMLFlowContentChecker{check_end}->(@_);
5546 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5547 wakaba 1.134 },
5548     ## NOTE: This definition is partially reused by |details| element's
5549     ## checker.
5550 wakaba 1.52 };
5551    
5552     $Element->{$HTML_NS}->{input} = {
5553 wakaba 1.119 %HTMLEmptyChecker,
5554     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5555 wakaba 1.140 check_attrs => sub {
5556     my ($self, $item, $element_state) = @_;
5557 wakaba 1.142
5558 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5559 wakaba 1.142 $state = 'text' unless defined $state;
5560     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5561    
5562 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5563     my $attr_ns = $attr->namespace_uri;
5564     $attr_ns = '' unless defined $attr_ns;
5565     my $attr_ln = $attr->manakai_local_name;
5566     my $checker;
5567     my $status;
5568     if ($attr_ns eq '') {
5569     $status =
5570     {
5571     %HTMLAttrStatus,
5572     %HTMLM12NCommonAttrStatus,
5573     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5574     'accept-charset' => FEATURE_HTML2X_RFC,
5575     accesskey => FEATURE_M12N10_REC,
5576     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5577     align => FEATURE_M12N10_REC_DEPRECATED,
5578     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5579     autocomplete => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5580     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5581     checked => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5582     datafld => FEATURE_HTML4_REC_RESERVED,
5583     dataformatas => FEATURE_HTML4_REC_RESERVED,
5584     datasrc => FEATURE_HTML4_REC_RESERVED,
5585     disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5586     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5587     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5588 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5589     FEATURE_XHTMLBASIC11_CR,
5590 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5591 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5592 wakaba 1.140 list => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5593     max => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5594 wakaba 1.150 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X |
5595     FEATURE_M12N10_REC,
5596 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5597     min => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5598 wakaba 1.156 multiple => FEATURE_HTML5_DEFAULT,
5599 wakaba 1.140 name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5600 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5601 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5602     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5603     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5604     onformchange => FEATURE_WF2_INFORMATIVE,
5605     onforminput => FEATURE_WF2_INFORMATIVE,
5606     oninput => FEATURE_WF2,
5607     oninvalid => FEATURE_WF2,
5608     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5609     pattern => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5610 wakaba 1.156 placeholder => FEATURE_HTML5_DEFAULT,
5611 wakaba 1.140 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5612     replace => FEATURE_WF2,
5613     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5614     sdapref => FEATURE_HTML20_RFC,
5615 wakaba 1.154 size => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5616 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5617     step => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5618     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5619     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5620 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5621 wakaba 1.140 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5622     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5623     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5624     }->{$attr_ln};
5625    
5626     $checker =
5627     {
5628 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5629     ## applicable for a specific set of states.
5630 wakaba 1.142 accept => '',
5631 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5632     ## NOTE: To which states it applies is not defined in RFC 2070.
5633 wakaba 1.150 accesskey => '', ## NOTE: Not applied to |hidden| [WF2].
5634 wakaba 1.142 action => '',
5635 wakaba 1.150 align => '',
5636 wakaba 1.141 alt => '',
5637 wakaba 1.142 autocomplete => '',
5638 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5639     ## NOTE: <input type=hidden disabled> is not disallowed.
5640 wakaba 1.142 checked => '',
5641     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5642 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5643 wakaba 1.142 enctype => '',
5644     form => $HTMLFormAttrChecker,
5645 wakaba 1.150 inputmode => '',
5646     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5647 wakaba 1.142 list => '',
5648     max => '',
5649     maxlength => '',
5650     method => '',
5651     min => '',
5652 wakaba 1.156 multiple => '',
5653 wakaba 1.165 name => $FormControlNameAttrChecker,
5654 wakaba 1.166 novalidate => '',
5655 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5656     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5657     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5658     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5659     ## TODO: tests for four attributes above
5660 wakaba 1.142 pattern => '',
5661 wakaba 1.156 placeholder => '',
5662 wakaba 1.142 readonly => '',
5663 wakaba 1.150 replace => '',
5664 wakaba 1.142 required => '',
5665     size => '',
5666     src => '',
5667     step => '',
5668     target => '',
5669 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5670 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5671     email => 1, password => 1,
5672 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5673 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5674     checkbox => 1,
5675 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5676     button => 1,
5677 wakaba 1.140 }),
5678 wakaba 1.151 usemap => '',
5679 wakaba 1.142 value => '',
5680 wakaba 1.140 }->{$attr_ln};
5681 wakaba 1.141
5682     ## State-dependent checkers
5683     unless ($checker) {
5684     if ($state eq 'hidden') {
5685     $checker =
5686     {
5687 wakaba 1.142 value => sub {
5688     my ($self, $attr, $item, $element_state) = @_;
5689 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5690 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5691     $self->{onerror}->(node => $attr,
5692     type => '_charset_ value',
5693     level => $self->{level}->{must});
5694     }
5695     },
5696 wakaba 1.141 }->{$attr_ln} || $checker;
5697 wakaba 1.142 ## TODO: Warn if no name attribute?
5698     ## TODO: Warn if name!=_charset_ and no value attribute?
5699 wakaba 1.168 } elsif ({
5700     datetime => 1, date => 1, month => 1, time => 1,
5701     week => 1, 'datetime-local' => 1,
5702     }->{$state}) {
5703     my $v = {
5704     datetime => ['global_date_and_time_string'],
5705     date => ['date_string'],
5706     month => ['month_string'],
5707     week => ['week_string'],
5708     time => ['time_string'],
5709     'datetime-local' => ['local_date_and_time_string'],
5710     }->{$state};
5711 wakaba 1.144 $checker =
5712     {
5713 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5714 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5715     on => 1, off => 1,
5716     }),
5717 wakaba 1.158 list => $ListAttrChecker,
5718 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5719     max => $GetDateTimeAttrChecker->($v->[0]),
5720 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5721 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5722 wakaba 1.148 step => $StepAttrChecker,
5723 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5724 wakaba 1.144 }->{$attr_ln} || $checker;
5725     } elsif ($state eq 'number') {
5726     $checker =
5727     {
5728 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5729 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5730     on => 1, off => 1,
5731     }),
5732 wakaba 1.158 list => $ListAttrChecker,
5733 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5734     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5735 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5736 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5737 wakaba 1.148 step => $StepAttrChecker,
5738 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5739 wakaba 1.144 }->{$attr_ln} || $checker;
5740     } elsif ($state eq 'range') {
5741     $checker =
5742     {
5743 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5744 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5745     on => 1, off => 1,
5746     }),
5747 wakaba 1.158 list => $ListAttrChecker,
5748 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5749     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5750 wakaba 1.148 step => $StepAttrChecker,
5751 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5752 wakaba 1.144 }->{$attr_ln} || $checker;
5753 wakaba 1.157 } elsif ($state eq 'color') {
5754     $checker =
5755     {
5756     accesskey => $HTMLAccesskeyAttrChecker,
5757     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5758     on => 1, off => 1,
5759     }),
5760 wakaba 1.158 list => $ListAttrChecker,
5761 wakaba 1.157 value => sub {
5762     my ($self, $attr) = @_;
5763     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5764     $self->{onerror}->(node => $attr,
5765     type => 'scolor:syntax error', ## TODOC: type
5766     level => $self->{level}->{must});
5767     }
5768     },
5769     }->{$attr_ln} || $checker;
5770 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5771     $checker =
5772     {
5773 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5774 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5775     ## TODO: tests
5776 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5777 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5778     }->{$attr_ln} || $checker;
5779     ## TODO: There MUST be another input type=radio with same
5780     ## name (Radio state).
5781     ## ISSUE: There should be exactly one type=radio with checked?
5782     } elsif ($state eq 'file') {
5783     $checker =
5784     {
5785 wakaba 1.161 accept => $AcceptAttrChecker,
5786 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5787 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5788 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5789 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5790 wakaba 1.144 }->{$attr_ln} || $checker;
5791     } elsif ($state eq 'submit') {
5792     $checker =
5793     {
5794 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5795 wakaba 1.149 action => $HTMLURIAttrChecker,
5796 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5797     'application/x-www-form-urlencoded' => 1,
5798     'multipart/form-data' => 1,
5799     'text/plain' => 1,
5800     }),
5801 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5802     get => 1, post => 1, put => 1, delete => 1,
5803     }),
5804 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5805 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5806     document => 1, values => 1,
5807     }),
5808     target => $HTMLTargetAttrChecker,
5809 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5810     }->{$attr_ln} || $checker;
5811     } elsif ($state eq 'image') {
5812     $checker =
5813     {
5814 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5815 wakaba 1.149 action => $HTMLURIAttrChecker,
5816     align => $GetHTMLEnumeratedAttrChecker->({
5817     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5818     }),
5819 wakaba 1.144 alt => sub {
5820     my ($self, $attr) = @_;
5821     my $value = $attr->value;
5822     unless (length $value) {
5823     $self->{onerror}->(node => $attr,
5824     type => 'empty anchor image alt',
5825     level => $self->{level}->{must});
5826     }
5827     },
5828 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5829     'application/x-www-form-urlencoded' => 1,
5830     'multipart/form-data' => 1,
5831     'text/plain' => 1,
5832     }),
5833 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5834     method => $GetHTMLEnumeratedAttrChecker->({
5835     get => 1, post => 1, put => 1, delete => 1,
5836     }),
5837 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5838 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5839     document => 1, values => 1,
5840     }),
5841 wakaba 1.144 src => $HTMLURIAttrChecker,
5842     ## TODO: There is requirements on the referenced resource.
5843 wakaba 1.149 target => $HTMLTargetAttrChecker,
5844     usemap => $HTMLUsemapAttrChecker,
5845 wakaba 1.144 }->{$attr_ln} || $checker;
5846     ## TODO: alt & src are required.
5847     } elsif ({
5848     reset => 1, button => 1,
5849     ## NOTE: From Web Forms 2.0:
5850     remove => 1, 'move-up' => 1, 'move-down' => 1,
5851     add => 1,
5852     }->{$state}) {
5853     $checker =
5854     {
5855 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5856 wakaba 1.144 ## NOTE: According to Web Forms 2.0, |input| attribute
5857     ## has |template| attribute to support the |add| button
5858     ## type (as part of the repetition template feature). It
5859     ## conflicts with the |template| global attribute
5860     ## introduced as part of the data template feature.
5861     ## NOTE: |template| attribute as defined in Web Forms 2.0
5862     ## has no author requirement.
5863     value => sub { }, ## NOTE: No restriction.
5864     }->{$attr_ln} || $checker;
5865 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5866 wakaba 1.141 $checker =
5867     {
5868 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5869 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5870     on => 1, off => 1,
5871     }),
5872 wakaba 1.149 ## TODO: inputmode [WF2]
5873 wakaba 1.158 list => $ListAttrChecker,
5874 wakaba 1.147 maxlength => sub {
5875     my ($self, $attr, $item, $element_state) = @_;
5876    
5877     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5878    
5879 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5880 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5881     ## integers results in a number.
5882     my $max_allowed_value_length = 0+$1;
5883    
5884     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5885     if (defined $value) {
5886     my $codepoint_length = length $value;
5887 wakaba 1.162
5888 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5889     $self->{onerror}
5890     ->(node => $item->{node}
5891     ->get_attribute_node_ns (undef, 'value'),
5892     type => 'value too long',
5893     level => $self->{level}->{must});
5894     }
5895     }
5896     }
5897     },
5898 wakaba 1.160 pattern => $PatternAttrChecker,
5899 wakaba 1.159 placeholder => sub {
5900     my ($self, $attr) = @_;
5901     if ($attr->value =~ /[\x0D\x0A]/) {
5902     $self->{onerror}->(node => $attr,
5903     type => 'newline in value', ## TODOC: type
5904     level => $self->{level}->{must});
5905     }
5906     },
5907 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5908 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5909 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5910 wakaba 1.143 value => sub {
5911 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5912     if ($state eq 'url') {
5913     $HTMLURIAttrChecker->(@_);
5914     } elsif ($state eq 'email') {
5915     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5916     my @addr = split /,/, $attr->value, -1;
5917     @addr = ('') unless @addr;
5918     for (@addr) {
5919 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5920     s/[\x09\x0A\x0C\x0D\x20]\z//;
5921 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5922     $self->{onerror}->(node => $attr,
5923     type => 'email:syntax error', ## TODO: type
5924     value => $_,
5925     level => $self->{level}->{must});
5926     }
5927     }
5928     } else {
5929     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5930     $self->{onerror}->(node => $attr,
5931     type => 'email:syntax error', ## TODO: type
5932     level => $self->{level}->{must});
5933     }
5934     }
5935     } else {
5936     if ($attr->value =~ /[\x0D\x0A]/) {
5937     $self->{onerror}->(node => $attr,
5938     type => 'newline in value', ## TODO: type
5939     level => $self->{level}->{must});
5940     }
5941     }
5942 wakaba 1.143 },
5943 wakaba 1.141 }->{$attr_ln} || $checker;
5944 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5945 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
5946     if $state eq 'email' and $attr_ln eq 'multiple';
5947 wakaba 1.161
5948     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
5949     not $item->{node}->has_attribute_ns (undef, 'title')) {
5950     $self->{onerror}->(node => $item->{node},
5951     type => 'attribute missing',
5952     text => 'title',
5953     level => $self->{level}->{should});
5954     }
5955 wakaba 1.141 }
5956     }
5957    
5958     if (defined $checker) {
5959     if ($checker eq '') {
5960     $checker = sub {
5961     my ($self, $attr) = @_;
5962     $self->{onerror}->(node => $attr,
5963     type => 'input attr not applicable',
5964     text => $state,
5965     level => $self->{level}->{must});
5966     };
5967     }
5968 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
5969     $attr_ln !~ /[A-Z]/) {
5970     $checker = $HTMLDatasetAttrChecker;
5971     $status = $HTMLDatasetAttrStatus;
5972     } else {
5973     $checker = $HTMLAttrChecker->{$attr_ln};
5974     }
5975     }
5976     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
5977     || $AttrChecker->{$attr_ns}->{''};
5978     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
5979     || $AttrStatus->{$attr_ns}->{''};
5980     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
5981 wakaba 1.157
5982     ## TODOC: accesskey="" is also applied to type=search and type=color
5983 wakaba 1.140
5984     if ($checker) {
5985     $checker->($self, $attr, $item, $element_state) if ref $checker;
5986     } elsif ($attr_ns eq '' and not $status) {
5987     #
5988     } else {
5989     $self->{onerror}->(node => $attr,
5990     type => 'unknown attribute',
5991     level => $self->{level}->{uncertain});
5992     ## ISSUE: No comformance createria for unknown attributes in the spec
5993     }
5994    
5995     $self->_attr_status_info ($attr, $status);
5996     }
5997 wakaba 1.168
5998     ## ISSUE: -0/+0
5999    
6000     if ($state eq 'range') {
6001     $element_state->{number_value}->{min} ||= 0;
6002     $element_state->{number_value}->{max} = 100
6003     unless defined $element_state->{number_value}->{max};
6004     }
6005    
6006     if (defined $element_state->{date_value}->{min} or
6007     defined $element_state->{date_value}->{max}) {
6008     my $min_value = $element_state->{date_value}->{min};
6009     my $max_value = $element_state->{date_value}->{max};
6010     my $value_value = $element_state->{date_value}->{value};
6011    
6012     if (defined $min_value and $min_value eq '' and
6013     (defined $max_value or defined $value_value)) {
6014     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6015     $self->{onerror}->(node => $min,
6016     type => 'date value not supported', ## TODOC: type
6017     value => $min->value,
6018     level => $self->{level}->{unsupported});
6019     undef $min_value;
6020     }
6021     if (defined $max_value and $max_value eq '' and
6022     (defined $max_value or defined $value_value)) {
6023     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6024     $self->{onerror}->(node => $max,
6025     type => 'date value not supported', ## TODOC: type
6026     value => $max->value,
6027     level => $self->{level}->{unsupported});
6028     undef $max_value;
6029     }
6030     if (defined $value_value and $value_value eq '' and
6031     (defined $max_value or defined $min_value)) {
6032     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6033     $self->{onerror}->(node => $value,
6034     type => 'date value not supported', ## TODOC: type
6035     value => $value->value,
6036     level => $self->{level}->{unsupported});
6037     undef $value_value;
6038     }
6039    
6040     if (defined $min_value and defined $max_value) {
6041     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6042     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6043     $self->{onerror}->(node => $max,
6044     type => 'max lt min', ## TODOC: type
6045     level => $self->{level}->{must});
6046     }
6047     }
6048    
6049     if (defined $min_value and defined $value_value) {
6050     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6051     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6052     $self->{onerror}->(node => $value,
6053     type => 'value lt min', ## TODOC: type
6054     level => $self->{level}->{warn});
6055     ## NOTE: Not an error.
6056     }
6057     }
6058    
6059     if (defined $max_value and defined $value_value) {
6060     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6061     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6062     $self->{onerror}->(node => $value,
6063     type => 'value gt max', ## TODOC: type
6064     level => $self->{level}->{warn});
6065     ## NOTE: Not an error.
6066     }
6067     }
6068     } elsif (defined $element_state->{number_value}->{min} or
6069     defined $element_state->{number_value}->{max}) {
6070     my $min_value = $element_state->{number_value}->{min};
6071     my $max_value = $element_state->{number_value}->{max};
6072     my $value_value = $element_state->{number_value}->{value};
6073    
6074     if (defined $min_value and defined $max_value) {
6075     if ($min_value > $max_value) {
6076     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6077     $self->{onerror}->(node => $max,
6078     type => 'max lt min', ## TODOC: type
6079     level => $self->{level}->{must});
6080     }
6081     }
6082    
6083     if (defined $min_value and defined $value_value) {
6084     if ($min_value > $value_value) {
6085     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6086     $self->{onerror}->(node => $value,
6087     type => 'value lt min', ## TODOC: type
6088     level => $self->{level}->{warn});
6089     ## NOTE: Not an error.
6090     }
6091     }
6092    
6093     if (defined $max_value and defined $value_value) {
6094     if ($max_value < $value_value) {
6095     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6096     $self->{onerror}->(node => $value,
6097     type => 'value gt max', ## TODOC: type
6098     level => $self->{level}->{warn});
6099     ## NOTE: Not an error.
6100     }
6101     }
6102     }
6103 wakaba 1.150
6104 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6105    
6106 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6107     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6108     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6109     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6110     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6111 wakaba 1.140 },
6112 wakaba 1.66 check_start => sub {
6113     my ($self, $item, $element_state) = @_;
6114 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6115     $self->{onerror}->(node => $item->{node},
6116     type => 'multiple labelable fae',
6117     level => $self->{level}->{must});
6118     } else {
6119     $self->{flag}->{has_labelable} = 2;
6120     }
6121 wakaba 1.138
6122     $element_state->{id_type} = 'labelable';
6123 wakaba 1.66 },
6124 wakaba 1.52 };
6125    
6126 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
6127    
6128 wakaba 1.80 ## NOTE: "authors who are nesting repetition blocks should position such
6129     ## [repetition-block-related] buttons carefully to make clear which block a
6130 wakaba 1.150 ## button applies to." [WF2]: I have no idea how this can be tested.
6131 wakaba 1.80
6132 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6133 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6134     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6135 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6136 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6137 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6138     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6139     ## |button| elements.
6140 wakaba 1.56 action => $HTMLURIAttrChecker,
6141 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6142 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6143 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6144     'application/x-www-form-urlencoded' => 1,
6145     'multipart/form-data' => 1,
6146     'text/plain' => 1,
6147     }),
6148 wakaba 1.136 form => $HTMLFormAttrChecker,
6149 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6150     get => 1, post => 1, put => 1, delete => 1,
6151     }),
6152 wakaba 1.165 name => $FormControlNameAttrChecker,
6153 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6154 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6155     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6156 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6157     target => $HTMLTargetAttrChecker,
6158 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6159     ## attribute to support the |add| button type (as part of repetition
6160     ## template feature). It conflicts with the |template| global attribute
6161     ## introduced as part of the data template feature.
6162     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6163     ## author requirement.
6164 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6165     button => 1, submit => 1, reset => 1,
6166     }),
6167 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6168 wakaba 1.52 }, {
6169     %HTMLAttrStatus,
6170     %HTMLM12NCommonAttrStatus,
6171     accesskey => FEATURE_M12N10_REC,
6172 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6173     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6174 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6175     dataformatas => FEATURE_HTML4_REC_RESERVED,
6176     datasrc => FEATURE_HTML4_REC_RESERVED,
6177 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6178     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6179     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6180 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6181 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6182     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6183 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6184 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6185     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6186 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6187     onforminput => FEATURE_WF2_INFORMATIVE,
6188 wakaba 1.56 replace => FEATURE_WF2,
6189 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6190 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6191 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6192 wakaba 1.119 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6193     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6194 wakaba 1.52 }),
6195 wakaba 1.66 check_start => sub {
6196     my ($self, $item, $element_state) = @_;
6197 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6198     $self->{onerror}->(node => $item->{node},
6199     type => 'multiple labelable fae',
6200     level => $self->{level}->{must});
6201     } else {
6202     $self->{flag}->{has_labelable} = 2;
6203     }
6204 wakaba 1.162
6205     ## ISSUE: "The value attribute must not be present unless the form
6206     ## [content] attribute is present.": Wrong?
6207 wakaba 1.139
6208 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6209     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6210 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6211     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6212 wakaba 1.138
6213     $element_state->{id_type} = 'labelable';
6214 wakaba 1.66 },
6215 wakaba 1.52 };
6216    
6217     $Element->{$HTML_NS}->{label} = {
6218 wakaba 1.139 %HTMLPhrasingContentChecker,
6219 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC
6220     | FEATURE_XHTML2_ED,
6221 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6222 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6223 wakaba 1.138 for => sub {
6224     my ($self, $attr) = @_;
6225    
6226     ## NOTE: MUST be an ID of a labelable element.
6227    
6228     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6229     },
6230 wakaba 1.136 form => $HTMLFormAttrChecker,
6231 wakaba 1.52 }, {
6232     %HTMLAttrStatus,
6233 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6234 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
6235 wakaba 1.119 for => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6236     form => FEATURE_HTML5_DEFAULT,
6237 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6238 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6239     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6240     }),
6241 wakaba 1.139 check_start => sub {
6242     my ($self, $item, $element_state) = @_;
6243     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6244    
6245     $element_state->{has_label_original} = $self->{flag}->{has_label};
6246     $self->{flag}->{has_label} = 1;
6247     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6248 wakaba 1.155 $self->{flag}->{has_labelable}
6249     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6250 wakaba 1.139
6251     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6252     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6253     },
6254     check_end => sub {
6255     my ($self, $item, $element_state) = @_;
6256     $self->_remove_minus_elements ($element_state);
6257    
6258     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6259     $self->{flag}->{has_labelable}
6260     = $element_state->{has_labelable_original};
6261     }
6262     delete $self->{flag}->{has_label}
6263     unless $element_state->{has_label_original};
6264     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6265    
6266     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6267    
6268     $HTMLPhrasingContentChecker{check_end}->(@_);
6269     },
6270 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6271     };
6272    
6273     $Element->{$HTML_NS}->{select} = {
6274 wakaba 1.121 %HTMLChecker,
6275 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6276 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6277     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6278 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6279 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6280 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6281 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6282 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6283 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6284 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6285 wakaba 1.136 form => $HTMLFormAttrChecker,
6286 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6287 wakaba 1.165 name => $FormControlNameAttrChecker,
6288 wakaba 1.163 ## TODO: tests for on*
6289 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6290     onforminput => $HTMLEventHandlerAttrChecker,
6291     oninput => $HTMLEventHandlerAttrChecker,
6292 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6293 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6294 wakaba 1.52 }, {
6295     %HTMLAttrStatus,
6296     %HTMLM12NCommonAttrStatus,
6297 wakaba 1.56 accesskey => FEATURE_WF2,
6298 wakaba 1.119 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6299 wakaba 1.56 data => FEATURE_WF2,
6300 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6301     dataformatas => FEATURE_HTML4_REC_RESERVED,
6302     datasrc => FEATURE_HTML4_REC_RESERVED,
6303 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6304     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6305 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6306 wakaba 1.119 multiple => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6307     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6308 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6309     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6310 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6311     onforminput => FEATURE_WF2_INFORMATIVE,
6312 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6313 wakaba 1.126 oninput => FEATURE_WF2,
6314 wakaba 1.56 oninvalid => FEATURE_WF2,
6315 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6316     sdapref => FEATURE_HTML20_RFC,
6317 wakaba 1.119 size => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6318 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6319     }),
6320 wakaba 1.66 check_start => sub {
6321     my ($self, $item, $element_state) = @_;
6322 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6323     $self->{onerror}->(node => $item->{node},
6324     type => 'multiple labelable fae',
6325     level => $self->{level}->{must});
6326     } else {
6327     $self->{flag}->{has_labelable} = 2;
6328     }
6329 wakaba 1.66
6330     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6331     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6332 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6333     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6334 wakaba 1.138
6335     $element_state->{id_type} = 'labelable';
6336 wakaba 1.66 },
6337 wakaba 1.121 check_child_element => sub {
6338 wakaba 1.163 ## NOTE: (option | optgroup)*
6339    
6340 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6341     $child_is_transparent, $element_state) = @_;
6342 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6343     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6344 wakaba 1.121 $self->{onerror}->(node => $child_el,
6345     type => 'element not allowed:minus',
6346     level => $self->{level}->{must});
6347     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6348     #
6349     } elsif ($child_nsuri eq $HTML_NS and
6350     {
6351     option => 1, optgroup => 1,
6352     }->{$child_ln}) {
6353     #
6354     } else {
6355     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6356     level => $self->{level}->{must});
6357     }
6358     },
6359     check_child_text => sub {
6360     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6361     if ($has_significant) {
6362     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6363     level => $self->{level}->{must});
6364     }
6365     },
6366 wakaba 1.52 };
6367 wakaba 1.1
6368 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6369 wakaba 1.121 %HTMLPhrasingContentChecker,
6370 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6371 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6372     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6373     }, {
6374 wakaba 1.52 %HTMLAttrStatus,
6375 wakaba 1.56 data => FEATURE_WF2,
6376 wakaba 1.52 }),
6377 wakaba 1.66 check_start => sub {
6378     my ($self, $item, $element_state) = @_;
6379    
6380 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6381    
6382 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6383 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6384     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6385 wakaba 1.158
6386     $element_state->{id_type} = 'datalist';
6387 wakaba 1.66 },
6388 wakaba 1.121 ## NOTE: phrasing | option*
6389     check_child_element => sub {
6390     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6391     $child_is_transparent, $element_state) = @_;
6392 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6393     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6394 wakaba 1.121 $self->{onerror}->(node => $child_el,
6395     type => 'element not allowed:minus',
6396     level => $self->{level}->{must});
6397     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6398     #
6399     } elsif ($element_state->{phase} eq 'phrasing') {
6400     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6401     #
6402     } else {
6403     $self->{onerror}->(node => $child_el,
6404     type => 'element not allowed:phrasing',
6405     level => $self->{level}->{must});
6406     }
6407     } elsif ($element_state->{phase} eq 'option') {
6408     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6409     #
6410     } else {
6411     $self->{onerror}->(node => $child_el,
6412     type => 'element not allowed',
6413     level => $self->{level}->{must});
6414     }
6415     } elsif ($element_state->{phase} eq 'any') {
6416     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6417     $element_state->{phase} = 'phrasing';
6418     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6419     $element_state->{phase} = 'option';
6420     } else {
6421     $self->{onerror}->(node => $child_el,
6422     type => 'element not allowed',
6423     level => $self->{level}->{must});
6424     }
6425     } else {
6426     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6427     }
6428     },
6429     check_child_text => sub {
6430     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6431     if ($has_significant) {
6432     if ($element_state->{phase} eq 'phrasing') {
6433     #
6434     } elsif ($element_state->{phase} eq 'any') {
6435     $element_state->{phase} = 'phrasing';
6436     } else {
6437     $self->{onerror}->(node => $child_node,
6438     type => 'character not allowed',
6439     level => $self->{level}->{must});
6440     }
6441     }
6442     },
6443     check_end => sub {
6444     my ($self, $item, $element_state) = @_;
6445     if ($element_state->{phase} eq 'phrasing') {
6446     if ($element_state->{has_significant}) {
6447     $item->{real_parent_state}->{has_significant} = 1;
6448     } elsif ($item->{transparent}) {
6449     #
6450     } else {
6451     $self->{onerror}->(node => $item->{node},
6452     type => 'no significant content',
6453     level => $self->{level}->{should});
6454     }
6455     } else {
6456     ## NOTE: Since the content model explicitly allows a |datalist| element
6457     ## being empty, we don't raise "no significant content" error for this
6458     ## element when there is no element. (We should raise an error for
6459     ## |<datalist><br></datalist>|, however.)
6460     ## NOTE: As a side-effect, when the |datalist| element only contains
6461     ## non-conforming content, then the |phase| flag has not changed from
6462     ## |any|, no "no significant content" error is raised neither.
6463     $HTMLChecker{check_end}->(@_);
6464     }
6465     },
6466 wakaba 1.52 };
6467 wakaba 1.49
6468 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6469 wakaba 1.121 %HTMLChecker,
6470 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6471 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6472     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6473 wakaba 1.164 label => sub {},
6474 wakaba 1.52 }, {
6475     %HTMLAttrStatus,
6476     %HTMLM12NCommonAttrStatus,
6477 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6478     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6479 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6480 wakaba 1.52 }),
6481 wakaba 1.164 check_attrs2 => sub {
6482     my ($self, $item, $element_state) = @_;
6483    
6484     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6485     $self->{onerror}->(node => $item->{node},
6486     type => 'attribute missing',
6487     text => 'label',
6488     level => $self->{level}->{must});
6489     }
6490     },
6491 wakaba 1.121 check_child_element => sub {
6492     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6493     $child_is_transparent, $element_state) = @_;
6494 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6495     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6496 wakaba 1.121 $self->{onerror}->(node => $child_el,
6497     type => 'element not allowed:minus',
6498     level => $self->{level}->{must});
6499     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6500     #
6501     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6502     #
6503     } else {
6504     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6505     level => $self->{level}->{must});
6506     }
6507     },
6508     check_child_text => sub {
6509     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6510     if ($has_significant) {
6511     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6512     level => $self->{level}->{must});
6513     }
6514     },
6515 wakaba 1.52 };
6516    
6517     $Element->{$HTML_NS}->{option} = {
6518     %HTMLTextChecker,
6519 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6520 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6521     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6522 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6523     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6524     value => sub {}, ## NOTE: No restriction.
6525 wakaba 1.52 }, {
6526     %HTMLAttrStatus,
6527     %HTMLM12NCommonAttrStatus,
6528 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6529     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6530 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6531 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6532     sdapref => FEATURE_HTML20_RFC,
6533 wakaba 1.119 selected => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6534     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6535 wakaba 1.52 }),
6536     };
6537 wakaba 1.49
6538 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6539     %HTMLTextChecker,
6540 wakaba 1.121 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6541 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6542 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6543 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6544 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6545 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6546 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6547 wakaba 1.136 form => $HTMLFormAttrChecker,
6548 wakaba 1.56 ## TODO: inputmode [WF2]
6549 wakaba 1.164 maxlength => sub {
6550     my ($self, $attr, $item, $element_state) = @_;
6551    
6552     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6553    
6554 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6555 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6556     ## results in a number.
6557     my $max_allowed_value_length = 0+$1;
6558    
6559     ## ISSUE: "The the purposes of this requirement," (typo)
6560    
6561     ## ISSUE: This constraint is applied w/o CRLF normalization to
6562     ## |value| attribute, but w/ CRLF normalization to
6563     ## concept-value.
6564     my $value = $item->{node}->text_content;
6565     if (defined $value) {
6566     my $codepoint_length = length $value;
6567    
6568     if ($codepoint_length > $max_allowed_value_length) {
6569     $self->{onerror}->(node => $item->{node},
6570     type => 'value too long',
6571     level => $self->{level}->{must});
6572     }
6573     }
6574     }
6575     },
6576 wakaba 1.165 name => $FormControlNameAttrChecker,
6577 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6578     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6579     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6580 wakaba 1.161 pattern => $PatternAttrChecker,
6581 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6582 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6583 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6584     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6585     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6586 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6587 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6588 wakaba 1.52 }, {
6589     %HTMLAttrStatus,
6590     %HTMLM12NCommonAttrStatus,
6591 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6592 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6593 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
6594 wakaba 1.121 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6595     cols => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6596 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6597 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6598     datasrc => FEATURE_HTML4_REC_RESERVED,
6599 wakaba 1.121 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6600     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6601 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6602 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6603 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6604     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6605 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6606     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6607     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6608 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6609     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6610     oninput => FEATURE_WF2, ## TODO: tests
6611     oninvalid => FEATURE_WF2, ## TODO: tests
6612 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6613 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6614 wakaba 1.121 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6615     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6616     rows => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6617 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6618     sdapref => FEATURE_HTML20_RFC,
6619 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6620 wakaba 1.121 wrap => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6621 wakaba 1.52 }),
6622 wakaba 1.66 check_start => sub {
6623     my ($self, $item, $element_state) = @_;
6624 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6625     $self->{onerror}->(node => $item->{node},
6626     type => 'multiple labelable fae',
6627     level => $self->{level}->{must});
6628     } else {
6629     $self->{flag}->{has_labelable} = 2;
6630     }
6631 wakaba 1.164
6632     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6633     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6634     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6635    
6636     $element_state->{id_type} = 'labelable';
6637     },
6638     check_attrs2 => sub {
6639     my ($self, $item, $element_state) = @_;
6640 wakaba 1.66
6641 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6642     not $item->{node}->has_attribute_ns (undef, 'title')) {
6643     ## NOTE: WF2 (dropped by HTML5)
6644     $self->{onerror}->(node => $item->{node},
6645     type => 'attribute missing',
6646     text => 'title',
6647     level => $self->{level}->{should});
6648     }
6649    
6650 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6651     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6652     if (defined $wrap) {
6653     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6654     if ($wrap eq 'hard') {
6655     $self->{onerror}->(node => $item->{node},
6656     type => 'attribute missing',
6657     text => 'cols',
6658     level => $self->{level}->{must});
6659     }
6660     }
6661     }
6662 wakaba 1.66 },
6663 wakaba 1.52 };
6664 wakaba 1.49
6665 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6666 wakaba 1.121 %HTMLPhrasingContentChecker,
6667     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6668 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6669 wakaba 1.165 for => sub {
6670     my ($self, $attr) = @_;
6671    
6672     ## NOTE: "Unordered set of unique space-separated tokens".
6673    
6674     my %word;
6675     for my $word (grep {length $_}
6676     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6677     unless ($word{$word}) {
6678     $word{$word} = 1;
6679     push @{$self->{idref}}, ['any', $word, $attr];
6680     } else {
6681     $self->{onerror}->(node => $attr, type => 'duplicate token',
6682     value => $word,
6683     level => $self->{level}->{must});
6684     }
6685     }
6686     },
6687 wakaba 1.136 form => $HTMLFormAttrChecker,
6688 wakaba 1.165 name => $FormControlNameAttrChecker,
6689     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6690     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6691 wakaba 1.56 }, {
6692 wakaba 1.52 %HTMLAttrStatus,
6693 wakaba 1.121 for => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6694     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6695     name => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6696 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6697     onformchange => FEATURE_WF2,
6698     onforminput => FEATURE_WF2,
6699 wakaba 1.52 }),
6700     };
6701    
6702     $Element->{$HTML_NS}->{isindex} = {
6703     %HTMLEmptyChecker,
6704 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6705     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6706 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6707     prompt => sub {}, ## NOTE: Text [M12N]
6708     }, {
6709     %HTMLAttrStatus,
6710 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6711     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6712     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6713     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6714 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6715 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6716 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6717     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6718 wakaba 1.52 }),
6719     ## TODO: Tests
6720     ## TODO: Tests for <nest/> in <isindex>
6721 wakaba 1.66 check_start => sub {
6722     my ($self, $item, $element_state) = @_;
6723    
6724     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6725 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6726     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6727 wakaba 1.66 },
6728 wakaba 1.52 };
6729 wakaba 1.49
6730 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6731 wakaba 1.40 %HTMLChecker,
6732 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6733 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6734 wakaba 1.91 charset => sub {
6735     my ($self, $attr) = @_;
6736    
6737     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6738     $self->{onerror}->(type => 'attribute not allowed',
6739     node => $attr,
6740 wakaba 1.104 level => $self->{level}->{must});
6741 wakaba 1.91 }
6742    
6743     $HTMLCharsetChecker->($attr->value, @_);
6744     },
6745 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6746 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6747 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6748     async => $GetHTMLBooleanAttrChecker->('async'),
6749 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6750 wakaba 1.49 }, {
6751     %HTMLAttrStatus,
6752 wakaba 1.153 async => FEATURE_HTML5_WD,
6753     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6754     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6755 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6756     for => FEATURE_HTML4_REC_RESERVED,
6757 wakaba 1.154 href => FEATURE_RDFA_REC,
6758 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6759 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6760 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6761     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6762 wakaba 1.9 }),
6763 wakaba 1.40 check_start => sub {
6764     my ($self, $item, $element_state) = @_;
6765 wakaba 1.1
6766 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6767     $element_state->{must_be_empty} = 1;
6768 wakaba 1.1 } else {
6769     ## NOTE: No content model conformance in HTML5 spec.
6770 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6771     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6772 wakaba 1.1 if ((defined $type and $type eq '') or
6773     (defined $language and $language eq '')) {
6774     $type = 'text/javascript';
6775     } elsif (defined $type) {
6776     #
6777     } elsif (defined $language) {
6778     $type = 'text/' . $language;
6779     } else {
6780     $type = 'text/javascript';
6781     }
6782 wakaba 1.93
6783     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6784     $type = "$1/$2";
6785     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6786     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6787     }
6788     $element_state->{script_type} = $type;
6789 wakaba 1.40 }
6790 wakaba 1.66
6791     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6792 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6793     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6794 wakaba 1.107
6795     $element_state->{text} = '';
6796 wakaba 1.40 },
6797     check_child_element => sub {
6798     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6799     $child_is_transparent, $element_state) = @_;
6800 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6801     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6802 wakaba 1.40 $self->{onerror}->(node => $child_el,
6803     type => 'element not allowed:minus',
6804 wakaba 1.104 level => $self->{level}->{must});
6805 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6806     #
6807     } else {
6808     if ($element_state->{must_be_empty}) {
6809     $self->{onerror}->(node => $child_el,
6810 wakaba 1.104 type => 'element not allowed:empty',
6811     level => $self->{level}->{must});
6812 wakaba 1.40 }
6813     }
6814     },
6815     check_child_text => sub {
6816     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6817     if ($has_significant and
6818     $element_state->{must_be_empty}) {
6819     $self->{onerror}->(node => $child_node,
6820 wakaba 1.104 type => 'character not allowed:empty',
6821     level => $self->{level}->{must});
6822 wakaba 1.40 }
6823 wakaba 1.115 $element_state->{text} .= $child_node->data;
6824 wakaba 1.40 },
6825     check_end => sub {
6826     my ($self, $item, $element_state) = @_;
6827     unless ($element_state->{must_be_empty}) {
6828 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6829     ## NOTE: XML content should be checked by THIS instance of checker
6830     ## as part of normal tree validation.
6831 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6832     type => 'XML script lang',
6833     text => $element_state->{script_type},
6834     level => $self->{level}->{uncertain});
6835     ## ISSUE: Should we raise some kind of error for
6836     ## <script type="text/xml">aaaaa</script>?
6837     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6838 wakaba 1.93 } else {
6839     $self->{onsubdoc}->({s => $element_state->{text},
6840     container_node => $item->{node},
6841     media_type => $element_state->{script_type},
6842     is_char_string => 1});
6843     }
6844 wakaba 1.40
6845     $HTMLChecker{check_end}->(@_);
6846 wakaba 1.1 }
6847     },
6848 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6849     ## NOTE: "When used to include script data, the script data must be embedded
6850     ## inline, the format of the data must be given using the type attribute,
6851     ## and the src attribute must not be specified." - not testable.
6852     ## TODO: It would be possible to err <script type=text/plain src=...>
6853 wakaba 1.1 };
6854 wakaba 1.25 ## ISSUE: Significant check and text child node
6855 wakaba 1.1
6856     ## NOTE: When script is disabled.
6857     $Element->{$HTML_NS}->{noscript} = {
6858 wakaba 1.40 %HTMLTransparentChecker,
6859 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6860 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6861     %HTMLAttrStatus,
6862     %HTMLM12NCommonAttrStatus,
6863 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6864 wakaba 1.49 }),
6865 wakaba 1.40 check_start => sub {
6866     my ($self, $item, $element_state) = @_;
6867 wakaba 1.3
6868 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6869 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6870     level => $self->{level}->{must});
6871 wakaba 1.3 }
6872    
6873 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6874     $self->_add_minus_elements ($element_state,
6875     {$HTML_NS => {noscript => 1}});
6876     }
6877 wakaba 1.79
6878     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6879     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6880 wakaba 1.3 },
6881 wakaba 1.40 check_child_element => sub {
6882     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6883     $child_is_transparent, $element_state) = @_;
6884     if ($self->{flag}->{in_head}) {
6885 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6886     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6887 wakaba 1.40 $self->{onerror}->(node => $child_el,
6888     type => 'element not allowed:minus',
6889 wakaba 1.104 level => $self->{level}->{must});
6890 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6891     #
6892     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6893     #
6894     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6895     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6896     $self->{onerror}->(node => $child_el,
6897     type => 'element not allowed:head noscript',
6898 wakaba 1.104 level => $self->{level}->{must});
6899 wakaba 1.40 }
6900     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6901 wakaba 1.47 my $http_equiv_attr
6902     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6903     if ($http_equiv_attr) {
6904     ## TODO: case
6905     if (lc $http_equiv_attr->value eq 'content-type') {
6906 wakaba 1.40 $self->{onerror}->(node => $child_el,
6907 wakaba 1.34 type => 'element not allowed:head noscript',
6908 wakaba 1.104 level => $self->{level}->{must});
6909 wakaba 1.47 } else {
6910     #
6911 wakaba 1.3 }
6912 wakaba 1.47 } else {
6913     $self->{onerror}->(node => $child_el,
6914     type => 'element not allowed:head noscript',
6915 wakaba 1.104 level => $self->{level}->{must});
6916 wakaba 1.3 }
6917 wakaba 1.40 } else {
6918     $self->{onerror}->(node => $child_el,
6919     type => 'element not allowed:head noscript',
6920 wakaba 1.104 level => $self->{level}->{must});
6921 wakaba 1.40 }
6922     } else {
6923     $HTMLTransparentChecker{check_child_element}->(@_);
6924     }
6925     },
6926     check_child_text => sub {
6927     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6928     if ($self->{flag}->{in_head}) {
6929     if ($has_significant) {
6930     $self->{onerror}->(node => $child_node,
6931 wakaba 1.104 type => 'character not allowed',
6932     level => $self->{level}->{must});
6933 wakaba 1.3 }
6934     } else {
6935 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6936     }
6937     },
6938     check_end => sub {
6939     my ($self, $item, $element_state) = @_;
6940     $self->_remove_minus_elements ($element_state);
6941     if ($self->{flag}->{in_head}) {
6942     $HTMLChecker{check_end}->(@_);
6943     } else {
6944     $HTMLPhrasingContentChecker{check_end}->(@_);
6945 wakaba 1.3 }
6946 wakaba 1.1 },
6947     };
6948 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6949 wakaba 1.1
6950     $Element->{$HTML_NS}->{'event-source'} = {
6951 wakaba 1.40 %HTMLEmptyChecker,
6952 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6953     check_attrs => $GetHTMLAttrsChecker->({
6954     src => $HTMLURIAttrChecker,
6955     }, {
6956     %HTMLAttrStatus,
6957     src => FEATURE_HTML5_LC_DROPPED,
6958     }),
6959     check_start => sub {
6960     my ($self, $item, $element_state) = @_;
6961    
6962     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6963     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6964     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6965     },
6966     };
6967    
6968     $Element->{$HTML_NS}->{eventsource} = {
6969     %HTMLEmptyChecker,
6970 wakaba 1.153 status => FEATURE_HTML5_WD,
6971 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6972 wakaba 1.1 src => $HTMLURIAttrChecker,
6973 wakaba 1.50 }, {
6974     %HTMLAttrStatus,
6975 wakaba 1.153 src => FEATURE_HTML5_WD,
6976 wakaba 1.1 }),
6977 wakaba 1.66 check_start => sub {
6978     my ($self, $item, $element_state) = @_;
6979    
6980     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6981 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6982     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6983 wakaba 1.66 },
6984 wakaba 1.1 };
6985    
6986     $Element->{$HTML_NS}->{details} = {
6987 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
6988 wakaba 1.153 status => FEATURE_HTML5_LC,
6989 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6990 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
6991 wakaba 1.50 }, {
6992     %HTMLAttrStatus,
6993 wakaba 1.153 open => FEATURE_HTML5_LC,
6994 wakaba 1.1 }),
6995     };
6996    
6997     $Element->{$HTML_NS}->{datagrid} = {
6998 wakaba 1.72 %HTMLFlowContentChecker,
6999 wakaba 1.48 status => FEATURE_HTML5_WD,
7000 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7001 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7002     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7003 wakaba 1.50 }, {
7004     %HTMLAttrStatus,
7005     disabled => FEATURE_HTML5_WD,
7006     multiple => FEATURE_HTML5_WD,
7007 wakaba 1.1 }),
7008 wakaba 1.40 check_start => sub {
7009     my ($self, $item, $element_state) = @_;
7010 wakaba 1.1
7011 wakaba 1.40 $self->_add_minus_elements ($element_state,
7012     {$HTML_NS => {a => 1, datagrid => 1}});
7013 wakaba 1.172
7014 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7015     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7016 wakaba 1.40 },
7017     check_end => sub {
7018     my ($self, $item, $element_state) = @_;
7019     $self->_remove_minus_elements ($element_state);
7020 wakaba 1.1
7021 wakaba 1.172 $HTMLFlowContentChecker{check_end}->(@_);
7022 wakaba 1.40 },
7023 wakaba 1.1 };
7024    
7025     $Element->{$HTML_NS}->{command} = {
7026 wakaba 1.40 %HTMLEmptyChecker,
7027 wakaba 1.48 status => FEATURE_HTML5_WD,
7028 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7029 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7030     default => $GetHTMLBooleanAttrChecker->('default'),
7031     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7032     icon => $HTMLURIAttrChecker,
7033     label => sub { }, ## NOTE: No conformance creteria
7034     radiogroup => sub { }, ## NOTE: No conformance creteria
7035     type => sub {
7036     my ($self, $attr) = @_;
7037     my $value = $attr->value;
7038     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7039 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7040     level => $self->{level}->{must});
7041 wakaba 1.1 }
7042     },
7043 wakaba 1.50 }, {
7044     %HTMLAttrStatus,
7045     checked => FEATURE_HTML5_WD,
7046 wakaba 1.175 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7047 wakaba 1.50 disabled => FEATURE_HTML5_WD,
7048     icon => FEATURE_HTML5_WD,
7049     label => FEATURE_HTML5_WD,
7050     radiogroup => FEATURE_HTML5_WD,
7051     type => FEATURE_HTML5_WD,
7052 wakaba 1.1 }),
7053 wakaba 1.66 check_start => sub {
7054     my ($self, $item, $element_state) = @_;
7055    
7056     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7057 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7058     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7059 wakaba 1.66 },
7060 wakaba 1.115 };
7061    
7062     $Element->{$HTML_NS}->{bb} = {
7063     %HTMLPhrasingContentChecker,
7064 wakaba 1.153 status => FEATURE_HTML5_WD,
7065 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7066     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7067     }, {
7068     %HTMLAttrStatus,
7069 wakaba 1.153 type => FEATURE_HTML5_WD,
7070 wakaba 1.115 }),
7071 wakaba 1.130 check_start => sub {
7072     my ($self, $item, $element_state) = @_;
7073     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7074    
7075     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7076     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7077     },
7078     check_end => sub {
7079     my ($self, $item, $element_state) = @_;
7080     $self->_remove_minus_elements ($element_state);
7081    
7082     $HTMLTransparentChecker{check_end}->(@_);
7083     },
7084 wakaba 1.1 };
7085    
7086     $Element->{$HTML_NS}->{menu} = {
7087 wakaba 1.40 %HTMLPhrasingContentChecker,
7088 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7089     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7090     ## NOTE: We don't want any |menu| element warned as deprecated.
7091 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7092 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7093 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7094 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7095     ## implementation, it does not match.)
7096 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7097     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7098 wakaba 1.49 }, {
7099     %HTMLAttrStatus,
7100     %HTMLM12NCommonAttrStatus,
7101 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7102 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7103 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7104 wakaba 1.50 label => FEATURE_HTML5_WD,
7105 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7106 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7107     sdapref => FEATURE_HTML20_RFC,
7108 wakaba 1.50 type => FEATURE_HTML5_WD,
7109 wakaba 1.1 }),
7110 wakaba 1.40 check_start => sub {
7111     my ($self, $item, $element_state) = @_;
7112     $element_state->{phase} = 'li or phrasing';
7113     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7114     $self->{flag}->{in_menu} = 1;
7115 wakaba 1.79
7116     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7117     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7118 wakaba 1.135 $element_state->{id_type} = 'menu';
7119 wakaba 1.40 },
7120     check_child_element => sub {
7121     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7122     $child_is_transparent, $element_state) = @_;
7123 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7124     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7125 wakaba 1.40 $self->{onerror}->(node => $child_el,
7126     type => 'element not allowed:minus',
7127 wakaba 1.104 level => $self->{level}->{must});
7128 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7129     #
7130     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7131     if ($element_state->{phase} eq 'li') {
7132     #
7133     } elsif ($element_state->{phase} eq 'li or phrasing') {
7134     $element_state->{phase} = 'li';
7135     } else {
7136 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7137     level => $self->{level}->{must});
7138 wakaba 1.40 }
7139     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7140     if ($element_state->{phase} eq 'phrasing') {
7141     #
7142     } elsif ($element_state->{phase} eq 'li or phrasing') {
7143     $element_state->{phase} = 'phrasing';
7144     } else {
7145 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7146     level => $self->{level}->{must});
7147 wakaba 1.40 }
7148     } else {
7149 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7150     level => $self->{level}->{must});
7151 wakaba 1.40 }
7152     },
7153     check_child_text => sub {
7154     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7155     if ($has_significant) {
7156     if ($element_state->{phase} eq 'phrasing') {
7157     #
7158     } elsif ($element_state->{phase} eq 'li or phrasing') {
7159     $element_state->{phase} = 'phrasing';
7160     } else {
7161     $self->{onerror}->(node => $child_node,
7162 wakaba 1.104 type => 'character not allowed',
7163     level => $self->{level}->{must});
7164 wakaba 1.1 }
7165     }
7166 wakaba 1.40 },
7167     check_end => sub {
7168     my ($self, $item, $element_state) = @_;
7169     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7170    
7171     if ($element_state->{phase} eq 'li') {
7172     $HTMLChecker{check_end}->(@_);
7173     } else { # 'phrasing' or 'li or phrasing'
7174     $HTMLPhrasingContentChecker{check_end}->(@_);
7175 wakaba 1.1 }
7176     },
7177 wakaba 1.8 };
7178    
7179     $Element->{$HTML_NS}->{datatemplate} = {
7180 wakaba 1.40 %HTMLChecker,
7181 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7182 wakaba 1.40 check_child_element => sub {
7183     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7184     $child_is_transparent, $element_state) = @_;
7185 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7186     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7187 wakaba 1.40 $self->{onerror}->(node => $child_el,
7188     type => 'element not allowed:minus',
7189 wakaba 1.104 level => $self->{level}->{must});
7190 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7191     #
7192     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7193     #
7194     } else {
7195     $self->{onerror}->(node => $child_el,
7196 wakaba 1.104 type => 'element not allowed:datatemplate',
7197     level => $self->{level}->{must});
7198 wakaba 1.40 }
7199     },
7200     check_child_text => sub {
7201     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7202     if ($has_significant) {
7203 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7204     level => $self->{level}->{must});
7205 wakaba 1.8 }
7206     },
7207     is_xml_root => 1,
7208     };
7209    
7210     $Element->{$HTML_NS}->{rule} = {
7211 wakaba 1.40 %HTMLChecker,
7212 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7213 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7214 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7215 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7216 wakaba 1.50 }, {
7217     %HTMLAttrStatus,
7218     condition => FEATURE_HTML5_AT_RISK,
7219     mode => FEATURE_HTML5_AT_RISK,
7220 wakaba 1.8 }),
7221 wakaba 1.40 check_start => sub {
7222     my ($self, $item, $element_state) = @_;
7223 wakaba 1.79
7224 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7225 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7226     $self->{flag}->{in_rule} = 1;
7227    
7228     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7229     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7230 wakaba 1.40 },
7231     check_child_element => sub { },
7232     check_child_text => sub { },
7233     check_end => sub {
7234     my ($self, $item, $element_state) = @_;
7235 wakaba 1.79
7236 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7237 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7238    
7239 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7240 wakaba 1.8 },
7241     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7242     ## is applied to some conforming data, results in a conforming DOM tree.":
7243     ## We don't check against this.
7244     };
7245    
7246     $Element->{$HTML_NS}->{nest} = {
7247 wakaba 1.40 %HTMLEmptyChecker,
7248 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7249 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7250 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7251     mode => sub {
7252     my ($self, $attr) = @_;
7253     my $value = $attr->value;
7254 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7255 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7256     level => $self->{level}->{must});
7257 wakaba 1.23 }
7258     },
7259 wakaba 1.50 }, {
7260     %HTMLAttrStatus,
7261     filter => FEATURE_HTML5_AT_RISK,
7262     mode => FEATURE_HTML5_AT_RISK,
7263 wakaba 1.8 }),
7264 wakaba 1.1 };
7265    
7266     $Element->{$HTML_NS}->{legend} = {
7267 wakaba 1.40 %HTMLPhrasingContentChecker,
7268 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7269 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7270 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
7271 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
7272     # top => 1, bottom => 1, left => 1, right => 1,
7273     # }),
7274 wakaba 1.167 form => $HTMLFormAttrChecker,
7275 wakaba 1.52 }, {
7276 wakaba 1.49 %HTMLAttrStatus,
7277     %HTMLM12NCommonAttrStatus,
7278     accesskey => FEATURE_M12N10_REC,
7279     align => FEATURE_M12N10_REC_DEPRECATED,
7280 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7281 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7282 wakaba 1.49 }),
7283 wakaba 1.170 check_child_element => sub {
7284     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7285     $child_is_transparent, $element_state) = @_;
7286     if ($item->{parent_state}->{in_figure}) {
7287     $HTMLFlowContentChecker{check_child_element}->(@_);
7288     } else {
7289     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7290     }
7291     },
7292     check_child_text => sub {
7293     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7294     if ($item->{parent_state}->{in_figure}) {
7295     $HTMLFlowContentChecker{check_child_text}->(@_);
7296     } else {
7297     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7298     }
7299     },
7300     check_start => sub {
7301     my ($self, $item, $element_state) = @_;
7302     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7303    
7304     $HTMLFlowContentChecker{check_start}->(@_);
7305     },
7306     check_end => sub {
7307     my ($self, $item, $element_state) = @_;
7308     $self->_remove_minus_elements ($element_state);
7309    
7310     $HTMLFlowContentChecker{check_end}->(@_);
7311     },
7312     }; # legend
7313 wakaba 1.1
7314     $Element->{$HTML_NS}->{div} = {
7315 wakaba 1.72 %HTMLFlowContentChecker,
7316 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
7317 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7318     align => $GetHTMLEnumeratedAttrChecker->({
7319     left => 1, center => 1, right => 1, justify => 1,
7320     }),
7321     }, {
7322 wakaba 1.49 %HTMLAttrStatus,
7323 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7324 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7325     datafld => FEATURE_HTML4_REC_RESERVED,
7326     dataformatas => FEATURE_HTML4_REC_RESERVED,
7327     datasrc => FEATURE_HTML4_REC_RESERVED,
7328 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7329 wakaba 1.49 }),
7330 wakaba 1.66 check_start => sub {
7331     my ($self, $item, $element_state) = @_;
7332    
7333     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7334 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7335     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7336 wakaba 1.66 },
7337 wakaba 1.1 };
7338    
7339 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7340 wakaba 1.72 %HTMLFlowContentChecker,
7341 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7342     check_attrs => $GetHTMLAttrsChecker->({}, {
7343     %HTMLAttrStatus,
7344     %HTMLM12NCommonAttrStatus,
7345 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7346 wakaba 1.64 }),
7347     };
7348    
7349 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7350 wakaba 1.40 %HTMLTransparentChecker,
7351 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7352 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7353     ## TODO: HTML4 |size|, |color|, |face|
7354 wakaba 1.49 }, {
7355     %HTMLAttrStatus,
7356 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7357 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7358 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7359 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7360 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7361     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7362 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7363 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7364     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7365 wakaba 1.49 }),
7366 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7367     ## it is allowed only in a document with the WYSIWYG signature. The
7368     ## checker does not check whether there is the signature, since the
7369     ## signature is dropped, too, and has never been implemented. (In addition,
7370     ## for any |font| element an "element not defined" error is raised anyway,
7371     ## such that we don't have to raise an additional error.)
7372 wakaba 1.1 };
7373 wakaba 1.49
7374 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7375     %HTMLEmptyChecker,
7376     status => FEATURE_M12N10_REC_DEPRECATED,
7377     check_attrs => $GetHTMLAttrsChecker->({
7378     ## TODO: color, face, size
7379     }, {
7380     %HTMLAttrStatus,
7381     color => FEATURE_M12N10_REC_DEPRECATED,
7382     face => FEATURE_M12N10_REC_DEPRECATED,
7383 wakaba 1.153 #id => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
7384     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7385 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7386     }),
7387     };
7388    
7389 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7390     ## class title id cols rows onload onunload style(x10)
7391     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7392     ## noframes Common, lang(xhtml10)
7393    
7394 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7395 wakaba 1.56
7396 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7397     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7398     ## xmp, listing sdapref[HTML2,0]
7399    
7400 wakaba 1.56 =pod
7401    
7402 wakaba 1.61 HTML 2.0 nextid @n
7403    
7404     RFC 2659: CERTS CRYPTOPTS
7405    
7406     ISO-HTML: pre-html, divN
7407 wakaba 1.82
7408     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7409     di (Common), nl (Common), handler (Common, type), standby (Common),
7410     summary (Common)
7411    
7412 wakaba 1.97 Access & XHTML2: access (LC)
7413 wakaba 1.82
7414     XML Events & XForms (for XHTML2 support; very, very low priority)
7415 wakaba 1.61
7416 wakaba 1.56 =cut
7417 wakaba 1.61
7418     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7419     ## We added them only to |a|. |link| and |form| might also allow them
7420     ## in theory.
7421 wakaba 1.1
7422     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7423    
7424     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24