/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.184 - (hide annotations) (download)
Sun Jul 26 09:32:37 2009 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.183: +4 -0 lines
++ whatpm/t/dom-conformance/ChangeLog	26 Jul 2009 09:32:24 -0000
	* html-global-1.dat: Added test cases for spellcheck="" global
	attribute (cf. HTML5 revision 2801).

2009-07-26  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	26 Jul 2009 09:31:50 -0000
	* HTML.pm: Added spellcheck="" attribute (HTML5 revision 2801).

2009-07-26  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.174 ## --- Feature Status ---
10    
11 wakaba 1.89 sub FEATURE_HTML5_COMPLETE () {
12 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
13 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16 wakaba 1.154 sub FEATURE_HTML5_CR () {
17     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21 wakaba 1.54 sub FEATURE_HTML5_LC () {
22 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
23 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
24     Whatpm::ContentChecker::FEATURE_ALLOWED
25     }
26     sub FEATURE_HTML5_AT_RISK () {
27 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
28     ## status.
29 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31     }
32     sub FEATURE_HTML5_WD () {
33 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
34 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
35     Whatpm::ContentChecker::FEATURE_ALLOWED
36     }
37     sub FEATURE_HTML5_FD () {
38 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
39 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
40     Whatpm::ContentChecker::FEATURE_ALLOWED
41     }
42     sub FEATURE_HTML5_DEFAULT () {
43 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
44 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
45     Whatpm::ContentChecker::FEATURE_ALLOWED
46 wakaba 1.49 }
47 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
48 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
49     ## comments, but then dropped.
50 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
51     }
52 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
53 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
54     ## then dropped.
55 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
56     }
57 wakaba 1.154
58 wakaba 1.119 sub FEATURE_WF2X () {
59 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
60     ## incorporated into the HTML5 spec.
61 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
62     }
63 wakaba 1.54 sub FEATURE_WF2 () {
64 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
65     ## merged into HTML5.
66 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
67 wakaba 1.54 }
68 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
69 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
70     ## were not merged into HTML5.
71 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
72     }
73 wakaba 1.49
74 wakaba 1.154 sub FEATURE_RDFA_REC () {
75     Whatpm::ContentChecker::FEATURE_STATUS_REC
76 wakaba 1.121 }
77 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
78     ## NOTE: The feature that was defined in a RDFa last call working
79     ## draft, but then dropped.
80 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
81     }
82 wakaba 1.58
83     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
84     ## attribute can be used- the only requirements for that matter is:
85     ## "the attribute MUST be referenced using its namespace-qualified form" (and
86     ## this is a host language conformance!).
87 wakaba 1.82 sub FEATURE_ROLE_LC () {
88     Whatpm::ContentChecker::FEATURE_STATUS_LC
89     }
90    
91     sub FEATURE_XHTML2_ED () {
92 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
93     ## "http://www.w3.org/1999/xhtml".
94 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
95     }
96 wakaba 1.58
97 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
98 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
99     ## M12N).
100     Whatpm::ContentChecker::FEATURE_STATUS_REC
101 wakaba 1.55 }
102     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
103 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
104     ## features.
105     Whatpm::ContentChecker::FEATURE_STATUS_REC |
106 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
107     }
108    
109 wakaba 1.154 sub FEATURE_RUBY_REC () {
110     Whatpm::ContentChecker::FEATURE_STATUS_CR
111 wakaba 1.82 }
112    
113 wakaba 1.154 sub FEATURE_M12N11_LC () {
114     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
115     Whatpm::ContentChecker::FEATURE_STATUS_REC;
116 wakaba 1.99 }
117    
118 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
119     ## It contains a number of problems. (However, again, it's a REC!)
120 wakaba 1.54 sub FEATURE_M12N10_REC () {
121 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
122 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
123     }
124     sub FEATURE_M12N10_REC_DEPRECATED () {
125     Whatpm::ContentChecker::FEATURE_STATUS_REC |
126     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
127     }
128 wakaba 1.49
129     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
130     ## (second edition). Only missing attributes from M12N10 abstract
131     ## definition are added.
132 wakaba 1.54 sub FEATURE_XHTML10_REC () {
133     Whatpm::ContentChecker::FEATURE_STATUS_CR
134     }
135    
136 wakaba 1.61 ## NOTE: Diff from HTML4.
137     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
138     Whatpm::ContentChecker::FEATURE_STATUS_CR
139     }
140 wakaba 1.58
141 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
142     ## 4.01). Only missing attributes from XHTML10 are added.
143 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
144     Whatpm::ContentChecker::FEATURE_STATUS_WD
145     }
146    
147     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
148     ## rather than presentational attributes (deprecated or not deprecated).
149 wakaba 1.48
150 wakaba 1.61 ## NOTE: Diff from HTML4.
151     sub FEATURE_HTML32_REC_OBSOLETE () {
152     Whatpm::ContentChecker::FEATURE_STATUS_CR |
153     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
154     ## NOTE: Lowercase normative "should".
155     }
156    
157     sub FEATURE_RFC2659 () { ## Experimental RFC
158     Whatpm::ContentChecker::FEATURE_STATUS_CR
159     }
160    
161     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
162     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
163     Whatpm::ContentChecker::FEATURE_STATUS_CR
164     }
165    
166     ## NOTE: Diff from HTML 2.0.
167     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
168     Whatpm::ContentChecker::FEATURE_STATUS_CR
169     }
170    
171     ## NOTE: Diff from HTML 3.2.
172     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
173     Whatpm::ContentChecker::FEATURE_STATUS_CR
174     }
175 wakaba 1.58
176 wakaba 1.174 ## --- Content Model ---
177    
178 wakaba 1.29 ## December 2007 HTML5 Classification
179    
180     my $HTMLMetadataContent = {
181     $HTML_NS => {
182     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
183 wakaba 1.118 'event-source' => 1, eventsource => 1,
184     command => 1, datatemplate => 1,
185 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
186     ## a metadata content other than |head| element.
187     meta => 1,
188     },
189     ## NOTE: RDF is mentioned in the HTML5 spec.
190     ## TODO: Other RDF elements?
191     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
192     };
193    
194 wakaba 1.72 my $HTMLFlowContent = {
195 wakaba 1.29 $HTML_NS => {
196     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
197     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
198     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
199     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
200 wakaba 1.119 form => 1, fieldset => 1,
201 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
202     datagrid => 1, ## ISSUE: "Flow element" in spec.
203 wakaba 1.29 datatemplate => 1,
204     div => 1, ## ISSUE: No category in spec.
205     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
206     ## Additionally, it must be before any other element or
207     ## non-inter-element-whitespace text node.
208     style => 1,
209    
210 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
211 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
212     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
213 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
214 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
215     command => 1, bb => 1,
216 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
217 wakaba 1.121 textarea => 1, output => 1,
218 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
219     ## NOTE: |area| is allowed only as a descendant of |map|.
220     area => 1,
221    
222 wakaba 1.124 ## NOTE: Transparent.
223     a => 1, ins => 1, del => 1, font => 1,
224 wakaba 1.29
225 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
226 wakaba 1.29 menu => 1,
227    
228     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
229     canvas => 1,
230     },
231    
232     ## NOTE: Embedded
233     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
234     q<http://www.w3.org/2000/svg> => {svg => 1},
235     };
236    
237 wakaba 1.58 my $HTMLSectioningContent = {
238 wakaba 1.57 $HTML_NS => {
239     section => 1, nav => 1, article => 1, aside => 1,
240     ## NOTE: |body| is only allowed in |html| element.
241     body => 1,
242     },
243     };
244    
245 wakaba 1.58 my $HTMLSectioningRoot = {
246 wakaba 1.29 $HTML_NS => {
247 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
248 wakaba 1.29 },
249     };
250    
251     my $HTMLHeadingContent = {
252     $HTML_NS => {
253     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
254     },
255     };
256    
257     my $HTMLPhrasingContent = {
258 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
259 wakaba 1.29 $HTML_NS => {
260 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
261 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
262     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
263 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
264 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
265     command => 1, bb => 1,
266 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
267 wakaba 1.121 textarea => 1, output => 1,
268 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
269     ## NOTE: |area| is allowed only as a descendant of |map|.
270     area => 1,
271    
272     ## NOTE: Transparent.
273 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
274 wakaba 1.29
275 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
276 wakaba 1.29 menu => 1,
277    
278     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
279     canvas => 1,
280     },
281    
282     ## NOTE: Embedded
283     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
284     q<http://www.w3.org/2000/svg> => {svg => 1},
285    
286     ## NOTE: And non-inter-element-whitespace text nodes.
287     };
288    
289 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
290 wakaba 1.29
291     my $HTMLInteractiveContent = {
292     $HTML_NS => {
293     a => 1,
294 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
295     details => 1, datagrid => 1, bb => 1,
296    
297     ## NOTE: When "controls" attribute is specified.
298     video => 1, audio => 1,
299    
300     ## NOTE: When "type=toolbar" attribute is specified.
301     menu => 1,
302 wakaba 1.29 },
303     };
304    
305 wakaba 1.139 ## NOTE: Labelable form-associated element.
306     my $LabelableFAE = {
307     $HTML_NS => {
308     input => 1, button => 1, select => 1, textarea => 1,
309     },
310     };
311    
312 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
313    
314 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
315     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
316    
317     ## -- Common attribute syntacx checkers
318    
319 wakaba 1.1 our $AttrChecker;
320 wakaba 1.82 our $AttrStatus;
321 wakaba 1.1
322     my $GetHTMLEnumeratedAttrChecker = sub {
323     my $states = shift; # {value => conforming ? 1 : -1}
324     return sub {
325     my ($self, $attr) = @_;
326     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
327     if ($states->{$value} > 0) {
328     #
329     } elsif ($states->{$value}) {
330 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
331     level => $self->{level}->{must});
332 wakaba 1.1 } else {
333 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
334     level => $self->{level}->{must});
335 wakaba 1.1 }
336     };
337     }; # $GetHTMLEnumeratedAttrChecker
338    
339     my $GetHTMLBooleanAttrChecker = sub {
340     my $local_name = shift;
341     return sub {
342     my ($self, $attr) = @_;
343 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
344 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
345 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
346 wakaba 1.104 level => $self->{level}->{must});
347 wakaba 1.1 }
348     };
349     }; # $GetHTMLBooleanAttrChecker
350    
351 wakaba 1.8 ## Unordered set of space-separated tokens
352 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
353     my $allowed_words = shift;
354     return sub {
355     my ($self, $attr) = @_;
356     my %word;
357 wakaba 1.132 for my $word (grep {length $_}
358     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
359 wakaba 1.92 unless ($word{$word}) {
360     $word{$word} = 1;
361     if (not defined $allowed_words or
362     $allowed_words->{$word}) {
363     #
364     } else {
365 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
366 wakaba 1.92 value => $word,
367 wakaba 1.104 level => $self->{level}->{must});
368 wakaba 1.92 }
369     } else {
370 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
371     value => $word,
372     level => $self->{level}->{must});
373 wakaba 1.92 }
374 wakaba 1.8 }
375 wakaba 1.92 };
376     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
377 wakaba 1.8
378 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
379 wakaba 1.1 ## whose allowed values are defined by the section on link types)
380     my $HTMLLinkTypesAttrChecker = sub {
381 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
382 wakaba 1.1 my %word;
383 wakaba 1.132 for my $word (grep {length $_}
384     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
385 wakaba 1.1 unless ($word{$word}) {
386     $word{$word} = 1;
387 wakaba 1.18 } elsif ($word eq 'up') {
388     #
389 wakaba 1.1 } else {
390 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
391     value => $word,
392     level => $self->{level}->{must});
393 wakaba 1.1 }
394     }
395     ## NOTE: Case sensitive match (since HTML5 spec does not say link
396     ## types are case-insensitive and it says "The value should not
397     ## be confusingly similar to any other defined value (e.g.
398     ## differing only in case).").
399     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
400     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
401     ## values to be used conformingly.
402 wakaba 1.66
403     my $is_hyperlink;
404     my $is_resource;
405 wakaba 1.1 require Whatpm::_LinkTypeList;
406     our $LinkType;
407     for my $word (keys %word) {
408     my $def = $LinkType->{$word};
409     if (defined $def) {
410     if ($def->{status} eq 'accepted') {
411     if (defined $def->{effect}->[$a_or_area]) {
412     #
413     } else {
414     $self->{onerror}->(node => $attr,
415 wakaba 1.104 type => 'link type:bad context',
416     value => $word,
417 wakaba 1.110 level => $self->{level}->{must});
418 wakaba 1.1 }
419     } elsif ($def->{status} eq 'proposal') {
420 wakaba 1.104 $self->{onerror}->(node => $attr,
421     type => 'link type:proposed',
422     value => $word,
423     level => $self->{level}->{should});
424 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
425     #
426     } else {
427     $self->{onerror}->(node => $attr,
428 wakaba 1.104 type => 'link type:bad context',
429     value => $word,
430     level => $self->{level}->{must});
431 wakaba 1.20 }
432 wakaba 1.1 } else { # rejected or synonym
433     $self->{onerror}->(node => $attr,
434 wakaba 1.104 type => 'link type:non-conforming',
435     value => $word,
436     level => $self->{level}->{must});
437 wakaba 1.1 }
438 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
439     if ($word eq 'alternate') {
440     #
441     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
442 wakaba 1.66 $is_hyperlink = 1;
443 wakaba 1.4 }
444     }
445 wakaba 1.1 if ($def->{unique}) {
446     unless ($self->{has_link_type}->{$word}) {
447     $self->{has_link_type}->{$word} = 1;
448     } else {
449     $self->{onerror}->(node => $attr,
450 wakaba 1.104 type => 'link type:duplicate',
451     value => $word,
452     level => $self->{level}->{must});
453 wakaba 1.1 }
454     }
455 wakaba 1.66
456     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
457     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
458     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
459     }
460 wakaba 1.1 } else {
461 wakaba 1.104 $self->{onerror}->(node => $attr,
462     type => 'unknown link type',
463     value => $word,
464     level => $self->{level}->{uncertain});
465 wakaba 1.1 }
466     }
467 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
468 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
469     ## says that using both X-Pingback: header field and HTML
470     ## <link rel=pingback> is deprecated and if both appears they
471     ## SHOULD contain exactly the same value.
472     ## ISSUE: Pingback 1.0 specification defines the exact representation
473     ## of its link element, which cannot be tested by the current arch.
474     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
475     ## include any string that matches to the pattern for the rel=pingback link,
476     ## which again inpossible to test.
477     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
478 wakaba 1.12
479     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
480 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
481     ## then they SHOULD be described in different paragraphs.".
482 wakaba 1.66
483     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
484     if ($is_hyperlink or $a_or_area) {
485     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
486     }
487     if ($is_resource and not $a_or_area) {
488     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
489     }
490 wakaba 1.96
491     $element_state->{link_rel} = \%word;
492 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
493 wakaba 1.20
494     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
495 wakaba 1.1
496     ## URI (or IRI)
497     my $HTMLURIAttrChecker = sub {
498 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
499 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
500     my $value = $attr->value;
501     Whatpm::URIChecker->check_iri_reference ($value, sub {
502 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
503 wakaba 1.106 }), $self->{level};
504 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
505 wakaba 1.66
506     my $attr_name = $attr->name;
507     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
508     ## TODO: absolute
509     push @{$self->{return}->{uri}->{$value} ||= []},
510     $element_state->{uri_info}->{$attr_name};
511 wakaba 1.1 }; # $HTMLURIAttrChecker
512    
513     ## A space separated list of one or more URIs (or IRIs)
514     my $HTMLSpaceURIsAttrChecker = sub {
515     my ($self, $attr) = @_;
516 wakaba 1.66
517     my $type = {ping => 'action',
518     profile => 'namespace',
519     archive => 'resource'}->{$attr->name};
520    
521 wakaba 1.1 my $i = 0;
522 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
523 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
524 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
525 wakaba 1.106 }, $self->{level});
526 wakaba 1.66
527     ## TODO: absolute
528     push @{$self->{return}->{uri}->{$value} ||= []},
529 wakaba 1.67 {node => $attr, type => {$type => 1}};
530 wakaba 1.66
531 wakaba 1.1 $i++;
532     }
533 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
534 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
535     ## ISSUE: A sequence of white space characters are conformant?
536     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
537 wakaba 1.132 ## ISSUE: What is "space"?
538 wakaba 1.1 ## NOTE: Duplication seems not an error.
539 wakaba 1.4 $self->{has_uri_attr} = 1;
540 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
541    
542 wakaba 1.156 my $ValidEmailAddress;
543     {
544     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
545     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
546     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
547     }
548    
549 wakaba 1.168 ## Valid global date and time.
550     my $GetDateTimeAttrChecker = sub ($) {
551     my $type = shift;
552     return sub {
553     my ($self, $attr, $item, $element_state) = @_;
554    
555     my $range_error;
556    
557     require Message::Date;
558     my $dp = Message::Date->new;
559     $dp->{level} = $self->{level};
560     $dp->{onerror} = sub {
561     my %opt = @_;
562     unless ($opt{type} eq 'date value not supported') {
563     $self->{onerror}->(%opt, node => $attr);
564     $range_error = '';
565     }
566     };
567    
568     my $method = 'parse_' . $type;
569     my $d = $dp->$method ($attr->value);
570     $element_state->{date_value}->{$attr->name} = $d || $range_error;
571     };
572     }; # $GetDateTimeAttrChecker
573 wakaba 1.1
574     my $HTMLIntegerAttrChecker = sub {
575     my ($self, $attr) = @_;
576     my $value = $attr->value;
577     unless ($value =~ /\A-?[0-9]+\z/) {
578 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
579     level => $self->{level}->{must});
580 wakaba 1.1 }
581     }; # $HTMLIntegerAttrChecker
582    
583     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
584     my $range_check = shift;
585     return sub {
586     my ($self, $attr) = @_;
587     my $value = $attr->value;
588     if ($value =~ /\A[0-9]+\z/) {
589     unless ($range_check->($value + 0)) {
590 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
591     level => $self->{level}->{must});
592 wakaba 1.1 }
593     } else {
594     $self->{onerror}->(node => $attr,
595 wakaba 1.104 type => 'nninteger:syntax error',
596     level => $self->{level}->{must});
597 wakaba 1.1 }
598     };
599     }; # $GetHTMLNonNegativeIntegerAttrChecker
600    
601     my $GetHTMLFloatingPointNumberAttrChecker = sub {
602     my $range_check = shift;
603     return sub {
604 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
605 wakaba 1.1 my $value = $attr->value;
606 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
607     $value =~ /\A-?\.[0-9]+\z/) {
608 wakaba 1.168 if ($range_check->($value + 0)) {
609     ## TODO: parse algorithm
610     $element_state->{number_value}->{$attr->name} = $value + 0;
611     } else {
612 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
613     level => $self->{level}->{must});
614 wakaba 1.1 }
615     } else {
616     $self->{onerror}->(node => $attr,
617 wakaba 1.104 type => 'float:syntax error',
618     level => $self->{level}->{must});
619 wakaba 1.1 }
620     };
621 wakaba 1.144
622     ## TODO: scientific notation
623 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
624    
625 wakaba 1.148 my $StepAttrChecker = sub {
626     ## NOTE: A valid floating point number (> 0), or ASCII
627     ## case-insensitive "any".
628    
629     my ($self, $attr) = @_;
630     my $value = $attr->value;
631     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
632     $value =~ /\A-?\.[0-9]+\z/) {
633     unless ($value > 0) {
634     $self->{onerror}->(node => $attr, type => 'float:out of range',
635     level => $self->{level}->{must});
636     }
637     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
638     #
639     } else {
640     $self->{onerror}->(node => $attr,
641     type => 'float:syntax error',
642     level => $self->{level}->{must});
643     }
644    
645     ## TODO: scientific
646     }; # $StepAttrChecker
647    
648 wakaba 1.86 ## HTML4 %Length;
649     my $HTMLLengthAttrChecker = sub {
650     my ($self, $attr) = @_;
651     my $value = $attr->value;
652     unless ($value =~ /\A[0-9]+%?\z/) {
653     $self->{onerror}->(node => $attr, type => 'length:syntax error',
654 wakaba 1.104 level => $self->{level}->{must});
655 wakaba 1.86 }
656    
657     ## NOTE: HTML4 definition is too vague - it does not define the syntax
658     ## of percentage value at all (!).
659     }; # $HTMLLengthAttrChecker
660    
661 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
662     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
663     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
664    
665 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
666     ## ISSUE: RFC 2046 does not define syntax of media types.
667     ## ISSUE: The definition of "a valid MIME type" is unknown.
668     ## Syntactical correctness?
669     my $HTMLIMTAttrChecker = sub {
670     my ($self, $attr) = @_;
671     my $value = $attr->value;
672     ## ISSUE: RFC 2045 Content-Type header field allows insertion
673     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
674     ## ISSUE: RFC 2231 extension? Maybe no.
675     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
676     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
677 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
678 wakaba 1.1 my @type = ($1, $2);
679     my $param = $3;
680 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
681 wakaba 1.1 if (defined $2) {
682     push @type, $1 => $2;
683     } else {
684     my $n = $1;
685 wakaba 1.152 my $v = $3;
686 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
687 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
688 wakaba 1.1 }
689     }
690     require Whatpm::IMTChecker;
691 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
692     $ic->{level} = $self->{level};
693     $ic->check_imt (sub {
694 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
695 wakaba 1.1 }, @type);
696     } else {
697 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
698     level => $self->{level}->{must});
699 wakaba 1.1 }
700     }; # $HTMLIMTAttrChecker
701    
702     my $HTMLLanguageTagAttrChecker = sub {
703 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
704    
705 wakaba 1.1 my ($self, $attr) = @_;
706 wakaba 1.6 my $value = $attr->value;
707     require Whatpm::LangTag;
708     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
709 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
710 wakaba 1.106 }, $self->{level});
711 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
712 wakaba 1.6
713     ## TODO: testdata
714 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
715    
716     ## "A valid media query [MQ]"
717     my $HTMLMQAttrChecker = sub {
718     my ($self, $attr) = @_;
719 wakaba 1.104 $self->{onerror}->(node => $attr,
720     type => 'media query',
721     level => $self->{level}->{uncertain});
722 wakaba 1.1 ## ISSUE: What is "a valid media query"?
723     }; # $HTMLMQAttrChecker
724    
725     my $HTMLEventHandlerAttrChecker = sub {
726     my ($self, $attr) = @_;
727 wakaba 1.104 $self->{onerror}->(node => $attr,
728     type => 'event handler',
729     level => $self->{level}->{uncertain});
730 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
731     ## ECMAScript |FunctionBody| production. [ECMA262]
732     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
733     ## ISSUE: Automatic semicolon insertion does not apply?
734     ## ISSUE: Other script languages?
735     }; # $HTMLEventHandlerAttrChecker
736    
737 wakaba 1.136 my $HTMLFormAttrChecker = sub {
738     my ($self, $attr) = @_;
739    
740     ## NOTE: MUST be the ID of a |form| element.
741    
742     my $value = $attr->value;
743 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
744 wakaba 1.136
745     ## ISSUE: <form id=""><input form=""> (empty ID)?
746     }; # $HTMLFormAttrChecker
747    
748 wakaba 1.158 my $ListAttrChecker = sub {
749     my ($self, $attr) = @_;
750    
751     ## NOTE: MUST be the ID of a |datalist| element.
752    
753     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
754    
755     ## TODO: Warn violation to control-dependent restrictions. For
756     ## example, |<input type=url maxlength=10 list=a> <datalist
757     ## id=a><option value=nonurlandtoolong></datalist>| should be
758     ## warned.
759     }; # $ListAttrChecker
760    
761 wakaba 1.160 my $PatternAttrChecker = sub {
762     my ($self, $attr) = @_;
763     $self->{onsubdoc}->({s => $attr->value,
764     container_node => $attr,
765     media_type => 'text/x-regexp-js',
766     is_char_string => 1});
767 wakaba 1.161
768     ## ISSUE: "value must match the Pattern production of ECMA 262's
769     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
770    
771     ## TODO: Warn if @value does not match @pattern.
772 wakaba 1.160 }; # $PatternAttrChecker
773    
774 wakaba 1.161 my $AcceptAttrChecker = sub {
775     my ($self, $attr) = @_;
776    
777     my $value = $attr->value;
778     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
779     my @value = length $value ? split /,/, $value, -1 : ('');
780     my %has_value;
781     for my $v (@value) {
782     if ($has_value{$v}) {
783     $self->{onerror}->(node => $attr,
784     type => 'duplicate token',
785     value => $v,
786     level => $self->{level}->{must});
787     next;
788     }
789     $has_value{$v} = 1;
790    
791     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
792     #
793     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
794     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
795     ## define its own syntax citing RFC 4288.
796    
797     ## NOTE: Parameters not allowed.
798     require Whatpm::IMTChecker;
799     my $ic = Whatpm::IMTChecker->new;
800     $ic->{level} = $self->{level};
801     $ic->check_imt (sub {
802     $self->{onerror}->(@_, node => $attr);
803     }, $1, $2);
804     } else {
805     $self->{onerror}->(node => $attr,
806     type => 'IMTnp:syntax error', ## TODOC: type
807     value => $v,
808     level => $self->{level}->{must});
809     }
810     }
811     }; # $AcceptAttrChecker
812    
813 wakaba 1.165 my $FormControlNameAttrChecker = sub {
814     my ($self, $attr) = @_;
815    
816     unless (length $attr->value) {
817     $self->{onerror}->(node => $attr,
818     type => 'empty control name', ## TODOC: type
819     level => $self->{level}->{must});
820     }
821    
822     ## NOTE: No uniqueness constraint.
823     }; # $FormControlNameAttrChecker
824    
825     my $AutofocusAttrChecker = sub {
826     my ($self, $attr) = @_;
827    
828     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
829    
830     if ($self->{has_autofocus}) {
831     $self->{onerror}->(node => $attr,
832     type => 'duplicate autofocus', ## TODOC: type
833     level => $self->{level}->{must});
834     }
835     $self->{has_autofocus} = 1;
836     }; # $AutofocusAttrChekcer
837    
838 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
839     my ($self, $attr) = @_;
840 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
841 wakaba 1.1 my $value = $attr->value;
842     if ($value =~ s/^#//) {
843 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
844     ## according to the "rules for parsing a hash-name reference" algorithm.
845     ## The document is non-conforming anyway, since |<map name="">| (empty
846     ## name) is non-conforming.
847 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
848     } else {
849 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
850     level => $self->{level}->{must});
851 wakaba 1.1 }
852 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
853 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
854     }; # $HTMLUsemapAttrChecker
855    
856 wakaba 1.76 ## Valid browsing context name
857     my $HTMLBrowsingContextNameAttrChecker = sub {
858     my ($self, $attr) = @_;
859     my $value = $attr->value;
860     if ($value =~ /^_/) {
861     $self->{onerror}->(node => $attr, type => 'window name:reserved',
862 wakaba 1.104 level => $self->{level}->{must},
863 wakaba 1.76 value => $value);
864     } elsif (length $value) {
865     #
866     } else {
867     $self->{onerror}->(node => $attr, type => 'window name:empty',
868 wakaba 1.104 level => $self->{level}->{must});
869 wakaba 1.76 }
870     }; # $HTMLBrowsingContextNameAttrChecker
871    
872     ## Valid browsing context name or keyword
873 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
874     my ($self, $attr) = @_;
875     my $value = $attr->value;
876     if ($value =~ /^_/) {
877     $value = lc $value; ## ISSUE: ASCII case-insentitive?
878     unless ({
879 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
880 wakaba 1.1 }->{$value}) {
881     $self->{onerror}->(node => $attr,
882 wakaba 1.76 type => 'window name:reserved',
883 wakaba 1.104 level => $self->{level}->{must},
884 wakaba 1.76 value => $value);
885 wakaba 1.1 }
886 wakaba 1.76 } elsif (length $value) {
887     #
888 wakaba 1.1 } else {
889 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
890 wakaba 1.104 level => $self->{level}->{must});
891 wakaba 1.1 }
892     }; # $HTMLTargetAttrChecker
893    
894 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
895     my ($self, $attr) = @_;
896    
897     ## ISSUE: Namespace resolution?
898    
899     my $value = $attr->value;
900    
901     require Whatpm::CSS::SelectorsParser;
902     my $p = Whatpm::CSS::SelectorsParser->new;
903     $p->{pseudo_class}->{$_} = 1 for qw/
904     active checked disabled empty enabled first-child first-of-type
905     focus hover indeterminate last-child last-of-type link only-child
906     only-of-type root target visited
907     lang nth-child nth-last-child nth-of-type nth-last-of-type not
908     -manakai-contains -manakai-current
909     /;
910    
911     $p->{pseudo_element}->{$_} = 1 for qw/
912     after before first-letter first-line
913     /;
914    
915 wakaba 1.104 $p->{level} = $self->{level};
916 wakaba 1.23 $p->{onerror} = sub {
917 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
918 wakaba 1.23 };
919     $p->parse_string ($value);
920     }; # $HTMLSelectorsAttrChecker
921    
922 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
923     my ($charset_value, $self, $attr, $ascii_compat) = @_;
924    
925     ## NOTE: This code is used for |charset=""| attributes, |charset=|
926     ## portion of the |content=""| attributes, and |accept-charset=""|
927     ## attributes.
928 wakaba 1.91
929     ## NOTE: Though the case-sensitivility of |charset| attribute value
930     ## is not explicitly spelled in the HTML5 spec, the Character Set
931     ## registry of IANA, which is referenced from HTML5 spec, says that
932     ## charset name is case-insensitive.
933     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
934    
935     require Message::Charset::Info;
936     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
937    
938     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
939     ## Syntactically valid and registered? What about x-charset names?
940     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
941     ($charset_value)) {
942     $self->{onerror}->(node => $attr,
943 wakaba 1.104 type => 'charset:syntax error',
944     value => $charset_value,
945     level => $self->{level}->{must});
946 wakaba 1.91 }
947    
948     if ($charset) {
949     ## ISSUE: What is "the preferred name for that encoding" (for a charset
950     ## with no "preferred MIME name" label)?
951     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
952     if (($charset_status &
953     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
954     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
955     $self->{onerror}->(node => $attr,
956 wakaba 1.104 type => 'charset:not preferred',
957     value => $charset_value,
958     level => $self->{level}->{must});
959 wakaba 1.91 }
960 wakaba 1.129
961 wakaba 1.91 if (($charset_status &
962     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
963     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
964     if ($charset_value =~ /^x-/) {
965     $self->{onerror}->(node => $attr,
966 wakaba 1.104 type => 'charset:private',
967     value => $charset_value,
968     level => $self->{level}->{good});
969 wakaba 1.91 } else {
970     $self->{onerror}->(node => $attr,
971 wakaba 1.104 type => 'charset:not registered',
972     value => $charset_value,
973     level => $self->{level}->{good});
974 wakaba 1.91 }
975     }
976 wakaba 1.129
977     if ($ascii_compat) {
978     if ($charset->{category} &
979     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
980     #
981     } else {
982     $self->{onerror}->(node => $attr,
983     type => 'charset:not ascii compat',
984     value => $charset_value,
985     level => $self->{level}->{must});
986     }
987     }
988    
989 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
990     } elsif ($charset_value =~ /^x-/) {
991     $self->{onerror}->(node => $attr,
992 wakaba 1.104 type => 'charset:private',
993     value => $charset_value,
994     level => $self->{level}->{good});
995 wakaba 1.129
996     ## NOTE: Whether this is an ASCII-compatible character encoding or
997     ## not is unknown.
998 wakaba 1.91 } else {
999     $self->{onerror}->(node => $attr,
1000 wakaba 1.104 type => 'charset:not registered',
1001     value => $charset_value,
1002     level => $self->{level}->{good});
1003 wakaba 1.129
1004     ## NOTE: Whether this is an ASCII-compatible character encoding or
1005     ## not is unknown.
1006 wakaba 1.91 }
1007    
1008     return ($charset, $charset_value);
1009     }; # $HTMLCharsetChecker
1010    
1011 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1012     ## MUST be the preferred name of an ASCII-compatible character
1013     ## encoding".
1014     my $HTMLCharsetsAttrChecker = sub {
1015     my ($self, $attr) = @_;
1016    
1017     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1018    
1019 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1020 wakaba 1.129
1021 wakaba 1.176 ## XXX
1022 wakaba 1.129 ## ISSUE: Uniqueness is not enforced.
1023    
1024     for my $charset (@value) {
1025     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1026     }
1027    
1028     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1029     }; # $HTMLCharsetsAttrChecker
1030    
1031 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1032     my ($self, $attr) = @_;
1033    
1034     ## NOTE: HTML4 "color" or |%Color;|
1035    
1036     my $value = $attr->value;
1037    
1038     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1039 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1040 wakaba 1.105 level => $self->{level}->{html4_fact});
1041 wakaba 1.68 }
1042    
1043     ## TODO: HTML4 has some guideline on usage of color.
1044     }; # $HTMLColorAttrChecker
1045    
1046 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1047     my ($self, $attr) = @_;
1048     $HTMLURIAttrChecker->(@_);
1049    
1050     my $attr_name = $attr->name;
1051    
1052     if ($attr_name eq 'ref') {
1053     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1054     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1055 wakaba 1.104 level => $self->{level}->{must});
1056 wakaba 1.79 }
1057     }
1058 wakaba 1.155
1059     require Message::URL;
1060 wakaba 1.79 my $doc = $attr->owner_document;
1061     my $doc_uri = $doc->document_uri;
1062 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1063 wakaba 1.79 my $no_frag_uri = $uri->clone;
1064     $no_frag_uri->uri_fragment (undef);
1065     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1066     (not defined $doc_uri and $no_frag_uri eq '')) {
1067     my $fragid = $uri->uri_fragment;
1068     if (defined $fragid) {
1069     push @{$self->{$attr_name}}, [$fragid => $attr];
1070     } else {
1071     DOCEL: {
1072     last DOCEL unless $attr_name eq 'template';
1073    
1074     my $docel = $doc->document_element;
1075     if ($docel) {
1076     my $nsuri = $docel->namespace_uri;
1077     if (defined $nsuri and $nsuri eq $HTML_NS) {
1078     if ($docel->manakai_local_name eq 'datatemplate') {
1079     last DOCEL;
1080     }
1081     }
1082     }
1083    
1084     $self->{onerror}->(node => $attr, type => 'template:not template',
1085 wakaba 1.104 level => $self->{level}->{must});
1086 wakaba 1.79 } # DOCEL
1087     }
1088     } else {
1089     ## TODO: An external document is referenced.
1090     ## The document MUST be an HTML or XML document.
1091     ## If there is a fragment identifier, it MUST point a part of the doc.
1092     ## If the attribute is |template|, the pointed part MUST be a
1093     ## |datatemplat| element.
1094     ## If no fragment identifier is specified, the root element MUST be
1095     ## a |datatemplate| element when the attribute is |template|.
1096     }
1097     }; # $HTMLRefOrTemplateAttrChecker
1098    
1099 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1100     my ($self, $attr) = @_;
1101    
1102     if (defined $attr->namespace_uri) {
1103     my $oe = $attr->owner_element;
1104     my $oe_nsuri = $oe->namespace_uri;
1105 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1106 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1107 wakaba 1.104 level => $self->{level}->{must});
1108 wakaba 1.83 }
1109     }
1110    
1111     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1112     }; # $HTMLRepeatIndexAttrChecker
1113    
1114 wakaba 1.179 my $PlaceholderAttrChecker = sub {
1115     my ($self, $attr) = @_;
1116     if ($attr->value =~ /[\x0D\x0A]/) {
1117     $self->{onerror}->(node => $attr,
1118     type => 'newline in value', ## TODOC: type
1119     level => $self->{level}->{must});
1120     }
1121     }; # $PlaceholderAttrChecker
1122    
1123 wakaba 1.1 my $HTMLAttrChecker = {
1124 wakaba 1.176 accesskey => sub {
1125     my ($self, $attr) = @_;
1126    
1127     ## "Ordered set of unique space-separated tokens"
1128    
1129     my %keys;
1130     my @keys = grep {length} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1131    
1132     for my $key (@keys) {
1133     unless ($keys{$key}) {
1134     $keys{$key} = 1;
1135     if (length $key != 1) {
1136     $self->{onerror}->(node => $attr, type => 'char:syntax error',
1137     value => $key,
1138     level => $self->{level}->{must});
1139     }
1140     } else {
1141     $self->{onerror}->(node => $attr, type => 'duplicate token',
1142     value => $key,
1143     level => $self->{level}->{must});
1144     }
1145     }
1146     }, # accesskey
1147    
1148 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1149 wakaba 1.1 id => sub {
1150 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1151 wakaba 1.1 my $value = $attr->value;
1152     if (length $value > 0) {
1153     if ($self->{id}->{$value}) {
1154 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1155     level => $self->{level}->{must});
1156 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1157     } else {
1158     $self->{id}->{$value} = [$attr];
1159 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1160 wakaba 1.1 }
1161 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1162 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1163     level => $self->{level}->{must});
1164 wakaba 1.1 }
1165     } else {
1166     ## NOTE: MUST contain at least one character
1167 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1168     level => $self->{level}->{must});
1169 wakaba 1.1 }
1170     },
1171     title => sub {}, ## NOTE: No conformance creteria
1172     lang => sub {
1173     my ($self, $attr) = @_;
1174 wakaba 1.6 my $value = $attr->value;
1175     if ($value eq '') {
1176     #
1177     } else {
1178     require Whatpm::LangTag;
1179     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1180 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1181 wakaba 1.106 }, $self->{level});
1182 wakaba 1.6 }
1183 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1184 wakaba 1.6
1185     ## TODO: test data
1186 wakaba 1.111
1187     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1188     ## non-conforming. Such errors are detected by the checkers of
1189     ## |{}xml:lang| and |{xml}:lang| attributes.
1190 wakaba 1.1 },
1191     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1192     class => sub {
1193     my ($self, $attr) = @_;
1194 wakaba 1.132
1195     ## NOTE: "Unordered set of unique space-separated tokens".
1196    
1197 wakaba 1.1 my %word;
1198 wakaba 1.132 for my $word (grep {length $_}
1199     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1200 wakaba 1.1 unless ($word{$word}) {
1201     $word{$word} = 1;
1202     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1203     } else {
1204 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1205     value => $word,
1206     level => $self->{level}->{must});
1207 wakaba 1.1 }
1208     }
1209     },
1210 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1211     true => 1, false => 1, '' => 1,
1212     }),
1213 wakaba 1.1 contextmenu => sub {
1214     my ($self, $attr) = @_;
1215     my $value = $attr->value;
1216 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1217 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1218     ## What is "in the DOM"? A menu Element node that is not part
1219     ## of the Document tree is in the DOM? A menu Element node that
1220     ## belong to another Document tree is in the DOM?
1221     },
1222 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1223 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1224 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1225     registrationmark => sub {
1226     my ($self, $attr, $item, $element_state) = @_;
1227    
1228     ## NOTE: Any value is conforming.
1229    
1230     if ($self->{flag}->{in_rule}) {
1231     my $el = $attr->owner_element;
1232     my $ln = $el->manakai_local_name;
1233     if ($ln eq 'nest' or
1234     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1235     my $nsuri = $el->namespace_uri;
1236     if (defined $nsuri and $nsuri eq $HTML_NS) {
1237     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1238 wakaba 1.104 level => $self->{level}->{must});
1239 wakaba 1.79 }
1240     }
1241     } else {
1242     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1243 wakaba 1.104 level => $self->{level}->{must});
1244 wakaba 1.79 }
1245     },
1246 wakaba 1.80 repeat => sub {
1247     my ($self, $attr) = @_;
1248 wakaba 1.83
1249     if (defined $attr->namespace_uri) {
1250     my $oe = $attr->owner_element;
1251     my $oe_nsuri = $oe->namespace_uri;
1252     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1253     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1254 wakaba 1.104 level => $self->{level}->{must});
1255 wakaba 1.83 }
1256     }
1257    
1258 wakaba 1.80 my $value = $attr->value;
1259     if ($value eq 'template') {
1260     #
1261     } elsif ($value =~ /\A-?[0-9]+\z/) {
1262     #
1263     } else {
1264     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1265 wakaba 1.104 level => $self->{level}->{must});
1266 wakaba 1.80 }
1267    
1268     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1269     ## that the attribute MAY be specified to any element, or that the
1270     ## element with that attribute (i.e. a repetition template) can be
1271     ## inserted anywhere in a document tree?
1272     },
1273 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1274     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1275     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1276 wakaba 1.80 'repeat-template' => sub {
1277 wakaba 1.83 my ($self, $attr) = @_;
1278    
1279     if (defined $attr->namespace_uri) {
1280     my $oe = $attr->owner_element;
1281     my $oe_nsuri = $oe->namespace_uri;
1282 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1283 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1284 wakaba 1.104 level => $self->{level}->{must});
1285 wakaba 1.83 }
1286     }
1287    
1288 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1289     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1290     ## attribute allowed on an element that is not a repetition block?
1291     },
1292 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1293 wakaba 1.184 spellcheck => $GetHTMLEnumeratedAttrChecker->({
1294     true => 1, false => 1, '' => 1,
1295     }),
1296 wakaba 1.128 style => sub {
1297     my ($self, $attr) = @_;
1298    
1299     $self->{onsubdoc}->({s => $attr->value,
1300     container_node => $attr,
1301     media_type => 'text/x-css-inline',
1302     is_char_string => 1});
1303    
1304     ## NOTE: "... MUST still be comprehensible and usable if those
1305     ## attributes were removed" is a semantic requirement, it cannot
1306     ## be tested.
1307     },
1308 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1309 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1310 wakaba 1.111 'xml:lang' => sub {
1311     my ($self, $attr) = @_;
1312    
1313     if ($attr->owner_document->manakai_is_html) {
1314     $self->{onerror}->(type => 'in HTML:xml:lang',
1315     level => $self->{level}->{info},
1316     node => $attr);
1317     ## NOTE: This is not an error, but the attribute will be ignored.
1318     } else {
1319     $self->{onerror}->(type => 'in XML:xml:lang',
1320     level => $self->{level}->{html5_no_may},
1321     node => $attr);
1322     ## TODO: We need to add test for this error.
1323     }
1324    
1325     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1326     (undef, 'lang');
1327     if ($lang_attr) {
1328     my $lang_attr_value = $lang_attr->value;
1329     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1330     my $value = $attr->value;
1331     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1332     if ($lang_attr_value ne $value) {
1333     $self->{onerror}->(type => 'xml:lang ne lang',
1334     level => $self->{level}->{must},
1335     node => $attr);
1336     }
1337     } else {
1338     $self->{onerror}->(type => 'xml:lang not allowed',
1339     level => $self->{level}->{must},
1340     node => $attr);
1341     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1342     }
1343     },
1344 wakaba 1.74 xmlns => sub {
1345     my ($self, $attr) = @_;
1346     my $value = $attr->value;
1347     unless ($value eq $HTML_NS) {
1348 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1349     level => $self->{level}->{must});
1350 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1351     }
1352     unless ($attr->owner_document->manakai_is_html) {
1353 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1354     level => $self->{level}->{must});
1355 wakaba 1.74 ## TODO: Test
1356     }
1357    
1358     ## TODO: Should be resolved?
1359     push @{$self->{return}->{uri}->{$value} ||= []},
1360     {node => $attr, type => {namespace => 1}};
1361     },
1362 wakaba 1.1 };
1363    
1364 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1365    
1366 wakaba 1.49 my %HTMLAttrStatus = (
1367 wakaba 1.176 accesskey => FEATURE_HTML5_FD,
1368 wakaba 1.153 class => FEATURE_HTML5_WD,
1369 wakaba 1.50 contenteditable => FEATURE_HTML5_DEFAULT,
1370     contextmenu => FEATURE_HTML5_WD,
1371 wakaba 1.153 dir => FEATURE_HTML5_WD,
1372 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1373 wakaba 1.115 hidden => FEATURE_HTML5_DEFAULT,
1374 wakaba 1.153 id => FEATURE_HTML5_WD,
1375 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1376 wakaba 1.153 lang => FEATURE_HTML5_WD,
1377 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1378     registrationmark => FEATURE_HTML5_AT_RISK,
1379 wakaba 1.60 repeat => FEATURE_WF2,
1380     'repeat-max' => FEATURE_WF2,
1381     'repeat-min' => FEATURE_WF2,
1382     'repeat-start' => FEATURE_WF2,
1383     'repeat-template' => FEATURE_WF2,
1384 wakaba 1.154 role => 0,
1385 wakaba 1.184 spellcheck => FEATURE_HTML5_WD,
1386 wakaba 1.153 style => FEATURE_HTML5_WD,
1387 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1388     template => FEATURE_HTML5_AT_RISK,
1389 wakaba 1.153 title => FEATURE_HTML5_WD,
1390 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1391 wakaba 1.49 );
1392    
1393     my %HTMLM12NCommonAttrStatus = (
1394 wakaba 1.154 about => FEATURE_RDFA_REC,
1395 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1396 wakaba 1.154 content => FEATURE_RDFA_REC,
1397     datatype => FEATURE_RDFA_REC,
1398 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1399 wakaba 1.154 href => FEATURE_RDFA_REC,
1400 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1401 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1402 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1403     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1404     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1405     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1406     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1407     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1408     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1409     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1410     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1411     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1412 wakaba 1.154 property => FEATURE_RDFA_REC,
1413     rel => FEATURE_RDFA_REC,
1414     resource => FEATURE_RDFA_REC,
1415     rev => FEATURE_RDFA_REC,
1416 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1417 wakaba 1.78 # FEATURE_M12N10_REC,
1418 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1419 wakaba 1.55 FEATURE_M12N10_REC,
1420 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1421 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1422 wakaba 1.49 );
1423    
1424 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1425     ## Core
1426 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1427     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1428 wakaba 1.82 #xml:id
1429     layout => FEATURE_XHTML2_ED,
1430 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1431 wakaba 1.82
1432     ## Hypertext
1433     cite => FEATURE_XHTML2_ED,
1434     href => FEATURE_XHTML2_ED,
1435     hreflang => FEATURE_XHTML2_ED,
1436     hrefmedia => FEATURE_XHTML2_ED,
1437     hreftype => FEATURE_XHTML2_ED,
1438     nextfocus => FEATURE_XHTML2_ED,
1439     prevfocus => FEATURE_XHTML2_ED,
1440     target => FEATURE_XHTML2_ED,
1441     #xml:base
1442    
1443     ## I18N
1444     #xml:lang
1445    
1446     ## Bi-directional
1447 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1448 wakaba 1.82
1449     ## Edit
1450     edit => FEATURE_XHTML2_ED,
1451     datetime => FEATURE_XHTML2_ED,
1452    
1453     ## Embedding
1454     encoding => FEATURE_XHTML2_ED,
1455     src => FEATURE_XHTML2_ED,
1456     srctype => FEATURE_XHTML2_ED,
1457    
1458     ## Image Map
1459     usemap => FEATURE_XHTML2_ED,
1460     ismap => FEATURE_XHTML2_ED,
1461     shape => FEATURE_XHTML2_ED,
1462     coords => FEATURE_XHTML2_ED,
1463    
1464     ## Media
1465     media => FEATURE_XHTML2_ED,
1466    
1467     ## Metadata
1468     about => FEATURE_XHTML2_ED,
1469     content => FEATURE_XHTML2_ED,
1470     datatype => FEATURE_XHTML2_ED,
1471     instanceof => FEATURE_XHTML2_ED,
1472     property => FEATURE_XHTML2_ED,
1473     rel => FEATURE_XHTML2_ED,
1474     resource => FEATURE_XHTML2_ED,
1475     rev => FEATURE_XHTML2_ED,
1476    
1477     ## Role
1478 wakaba 1.154 role => FEATURE_XHTML2_ED,
1479 wakaba 1.82
1480     ## Style
1481 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML2_ED, # "strongly discouraged"
1482 wakaba 1.82 );
1483    
1484     my %HTMLM12NXHTML2CommonAttrStatus = (
1485     %HTMLM12NCommonAttrStatus,
1486     %XHTML2CommonAttrStatus,
1487    
1488 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1489 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1490 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1491     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1492 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1493 wakaba 1.154 href => FEATURE_RDFA_REC,
1494 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1495 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1496     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1497     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1498     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1499     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1500 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1501 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1502 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1503 wakaba 1.82 FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1504 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1505 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1506 wakaba 1.82 );
1507    
1508 wakaba 1.1 for (qw/
1509     onabort onbeforeunload onblur onchange onclick oncontextmenu
1510     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1511     ondragstart ondrop onerror onfocus onkeydown onkeypress
1512 wakaba 1.180 onkeyup onload onmousedown onmousemove onmouseout
1513 wakaba 1.1 onmouseover onmouseup onmousewheel onresize onscroll onselect
1514 wakaba 1.77 onstorage onsubmit onunload
1515 wakaba 1.1 /) {
1516     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1517 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1518 wakaba 1.1 }
1519    
1520 wakaba 1.170 for (qw/
1521     ondataunavailable
1522 wakaba 1.180 onmessage
1523 wakaba 1.170 /) {
1524     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1525     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1526     }
1527    
1528 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1529     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1530     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1531    
1532     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1533     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1534     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1535     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1536     }
1537    
1538 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1539 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1540 wakaba 1.82 }
1541 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1542     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1543 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1544     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1545     ismap layout media nextfocus prevfocus shape src srctype style
1546     target usemap/) {
1547     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1548     }
1549     for (qw/class dir id title/) {
1550     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1551     }
1552     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1553     onmouseout onkeypress onkeydown onkeyup/) {
1554     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1555     }
1556    
1557 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1558     ## NOTE: "Authors should ... when the attributes are ignored and
1559     ## any associated CSS dropped, the page is still usable." (semantic
1560     ## constraint.)
1561     }; # $HTMLDatasetAttrChecker
1562    
1563 wakaba 1.153 my $HTMLDatasetAttrStatus = FEATURE_HTML5_WD;
1564 wakaba 1.73
1565 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1566     my $element_specific_checker = shift;
1567 wakaba 1.49 my $element_specific_status = shift;
1568 wakaba 1.1 return sub {
1569 wakaba 1.40 my ($self, $item, $element_state) = @_;
1570     for my $attr (@{$item->{node}->attributes}) {
1571 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1572     $attr_ns = '' unless defined $attr_ns;
1573     my $attr_ln = $attr->manakai_local_name;
1574     my $checker;
1575 wakaba 1.73 my $status;
1576 wakaba 1.1 if ($attr_ns eq '') {
1577 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1578     $attr_ln !~ /[A-Z]/) {
1579 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1580     $status = $HTMLDatasetAttrStatus;
1581     } else {
1582     $checker = $element_specific_checker->{$attr_ln}
1583     || $HTMLAttrChecker->{$attr_ln};
1584     $status = $element_specific_status->{$attr_ln};
1585     }
1586 wakaba 1.1 }
1587     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1588 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1589 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1590     || $AttrStatus->{$attr_ns}->{''};
1591     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1592 wakaba 1.1 if ($checker) {
1593 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1594 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1595 wakaba 1.54 #
1596 wakaba 1.1 } else {
1597 wakaba 1.104 $self->{onerror}->(node => $attr,
1598     type => 'unknown attribute',
1599     level => $self->{level}->{uncertain});
1600 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1601     }
1602 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1603 wakaba 1.1 }
1604     };
1605     }; # $GetHTMLAttrsChecker
1606    
1607 wakaba 1.40 my %HTMLChecker = (
1608     %Whatpm::ContentChecker::AnyChecker,
1609 wakaba 1.79 check_start => sub {
1610     my ($self, $item, $element_state) = @_;
1611    
1612     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1613     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1614     },
1615 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1616 wakaba 1.40 );
1617    
1618     my %HTMLEmptyChecker = (
1619     %HTMLChecker,
1620     check_child_element => sub {
1621     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1622     $child_is_transparent, $element_state) = @_;
1623 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1624     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1625 wakaba 1.40 $self->{onerror}->(node => $child_el,
1626     type => 'element not allowed:minus',
1627 wakaba 1.104 level => $self->{level}->{must});
1628 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1629     #
1630     } else {
1631     $self->{onerror}->(node => $child_el,
1632     type => 'element not allowed:empty',
1633 wakaba 1.104 level => $self->{level}->{must});
1634 wakaba 1.40 }
1635     },
1636     check_child_text => sub {
1637     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1638     if ($has_significant) {
1639     $self->{onerror}->(node => $child_node,
1640     type => 'character not allowed:empty',
1641 wakaba 1.104 level => $self->{level}->{must});
1642 wakaba 1.40 }
1643     },
1644     );
1645    
1646     my %HTMLTextChecker = (
1647     %HTMLChecker,
1648     check_child_element => sub {
1649     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1650     $child_is_transparent, $element_state) = @_;
1651 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1652     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1653 wakaba 1.40 $self->{onerror}->(node => $child_el,
1654     type => 'element not allowed:minus',
1655 wakaba 1.104 level => $self->{level}->{must});
1656 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1657     #
1658     } else {
1659 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1660     level => $self->{level}->{must});
1661 wakaba 1.40 }
1662     },
1663     );
1664    
1665 wakaba 1.72 my %HTMLFlowContentChecker = (
1666 wakaba 1.40 %HTMLChecker,
1667     check_child_element => sub {
1668     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1669     $child_is_transparent, $element_state) = @_;
1670 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1671     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1672 wakaba 1.40 $self->{onerror}->(node => $child_el,
1673     type => 'element not allowed:minus',
1674 wakaba 1.104 level => $self->{level}->{must});
1675 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1676     #
1677     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1678     if ($element_state->{has_non_style} or
1679     not $child_el->has_attribute_ns (undef, 'scoped')) {
1680 wakaba 1.104 $self->{onerror}->(node => $child_el,
1681 wakaba 1.72 type => 'element not allowed:flow style',
1682 wakaba 1.104 level => $self->{level}->{must});
1683 wakaba 1.40 }
1684 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1685 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1686 wakaba 1.40 } else {
1687     $element_state->{has_non_style} = 1;
1688 wakaba 1.104 $self->{onerror}->(node => $child_el,
1689 wakaba 1.72 type => 'element not allowed:flow',
1690 wakaba 1.104 level => $self->{level}->{must})
1691 wakaba 1.40 }
1692     },
1693     check_child_text => sub {
1694     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1695     if ($has_significant) {
1696     $element_state->{has_non_style} = 1;
1697     }
1698     },
1699     check_end => sub {
1700     my ($self, $item, $element_state) = @_;
1701 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1702 wakaba 1.40 if ($element_state->{has_significant}) {
1703 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1704 wakaba 1.40 } elsif ($item->{transparent}) {
1705     #
1706     } else {
1707     $self->{onerror}->(node => $item->{node},
1708 wakaba 1.104 level => $self->{level}->{should},
1709 wakaba 1.40 type => 'no significant content');
1710     }
1711     },
1712     );
1713    
1714     my %HTMLPhrasingContentChecker = (
1715     %HTMLChecker,
1716     check_child_element => sub {
1717     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1718     $child_is_transparent, $element_state) = @_;
1719 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1720     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1721 wakaba 1.40 $self->{onerror}->(node => $child_el,
1722     type => 'element not allowed:minus',
1723 wakaba 1.104 level => $self->{level}->{must});
1724 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1725     #
1726     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1727     #
1728     } else {
1729     $self->{onerror}->(node => $child_el,
1730     type => 'element not allowed:phrasing',
1731 wakaba 1.104 level => $self->{level}->{must});
1732 wakaba 1.40 }
1733     },
1734 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1735 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1736 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1737 wakaba 1.40 ## and |check_child_text|.
1738     );
1739    
1740 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1741 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1742 wakaba 1.46 ## with parent?
1743 wakaba 1.40
1744 wakaba 1.1 our $Element;
1745     our $ElementDefault;
1746    
1747     $Element->{$HTML_NS}->{''} = {
1748 wakaba 1.40 %HTMLChecker,
1749 wakaba 1.1 };
1750    
1751     $Element->{$HTML_NS}->{html} = {
1752 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1753 wakaba 1.1 is_root => 1,
1754 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1755 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1756 wakaba 1.67 version => sub {
1757     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1758     ## Though DTDs of various versions of HTML define the attribute
1759     ## as |#FIXED|, this conformance checker does no check for
1760     ## the attribute value, since what kind of check should be done
1761     ## is unknown.
1762     },
1763 wakaba 1.49 }, {
1764     %HTMLAttrStatus,
1765 wakaba 1.82 %XHTML2CommonAttrStatus,
1766 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1767     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1768     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1769     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1770     manifest => FEATURE_HTML5_WD,
1771 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1772 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1773 wakaba 1.1 }),
1774 wakaba 1.40 check_start => sub {
1775     my ($self, $item, $element_state) = @_;
1776     $element_state->{phase} = 'before head';
1777 wakaba 1.79
1778 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1779 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1780     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1781 wakaba 1.40 },
1782     check_child_element => sub {
1783     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1784     $child_is_transparent, $element_state) = @_;
1785 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1786     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1787 wakaba 1.40 $self->{onerror}->(node => $child_el,
1788     type => 'element not allowed:minus',
1789 wakaba 1.104 level => $self->{level}->{must});
1790 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1791     #
1792     } elsif ($element_state->{phase} eq 'before head') {
1793     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1794     $element_state->{phase} = 'after head';
1795     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1796     $self->{onerror}->(node => $child_el,
1797 wakaba 1.104 type => 'ps element missing',
1798     text => 'head',
1799     level => $self->{level}->{must});
1800 wakaba 1.40 $element_state->{phase} = 'after body';
1801     } else {
1802     $self->{onerror}->(node => $child_el,
1803 wakaba 1.104 type => 'element not allowed',
1804     level => $self->{level}->{must});
1805 wakaba 1.40 }
1806     } elsif ($element_state->{phase} eq 'after head') {
1807     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1808     $element_state->{phase} = 'after body';
1809     } else {
1810     $self->{onerror}->(node => $child_el,
1811 wakaba 1.104 type => 'element not allowed',
1812     level => $self->{level}->{must});
1813 wakaba 1.40 }
1814     } elsif ($element_state->{phase} eq 'after body') {
1815     $self->{onerror}->(node => $child_el,
1816 wakaba 1.104 type => 'element not allowed',
1817     level => $self->{level}->{must});
1818 wakaba 1.40 } else {
1819     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1820     }
1821     },
1822     check_child_text => sub {
1823     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1824     if ($has_significant) {
1825     $self->{onerror}->(node => $child_node,
1826 wakaba 1.104 type => 'character not allowed',
1827     level => $self->{level}->{must});
1828 wakaba 1.40 }
1829     },
1830     check_end => sub {
1831     my ($self, $item, $element_state) = @_;
1832     if ($element_state->{phase} eq 'after body') {
1833     #
1834     } elsif ($element_state->{phase} eq 'before head') {
1835     $self->{onerror}->(node => $item->{node},
1836 wakaba 1.104 type => 'child element missing',
1837     text => 'head',
1838     level => $self->{level}->{must});
1839 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1840 wakaba 1.104 type => 'child element missing',
1841     text => 'body',
1842     level => $self->{level}->{must});
1843 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1844     $self->{onerror}->(node => $item->{node},
1845 wakaba 1.104 type => 'child element missing',
1846     text => 'body',
1847     level => $self->{level}->{must});
1848 wakaba 1.40 } else {
1849     die "check_end: Bad |html| phase: $element_state->{phase}";
1850     }
1851 wakaba 1.1
1852 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1853     },
1854     };
1855 wakaba 1.25
1856 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1857 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1858 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1859     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1860     }, {
1861 wakaba 1.49 %HTMLAttrStatus,
1862 wakaba 1.82 %XHTML2CommonAttrStatus,
1863 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1864     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1865     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1866     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1867 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1868     }),
1869 wakaba 1.40 check_child_element => sub {
1870     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1871     $child_is_transparent, $element_state) = @_;
1872 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1873     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1874 wakaba 1.40 $self->{onerror}->(node => $child_el,
1875     type => 'element not allowed:minus',
1876 wakaba 1.104 level => $self->{level}->{must});
1877 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1878     #
1879     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1880     unless ($element_state->{has_title}) {
1881     $element_state->{has_title} = 1;
1882     } else {
1883     $self->{onerror}->(node => $child_el,
1884     type => 'element not allowed:head title',
1885 wakaba 1.104 level => $self->{level}->{must});
1886 wakaba 1.40 }
1887     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1888     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1889     $self->{onerror}->(node => $child_el,
1890     type => 'element not allowed:head style',
1891 wakaba 1.104 level => $self->{level}->{must});
1892 wakaba 1.1 }
1893 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1894     #
1895    
1896     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1897     ## a |meta| element with none of |charset|, |name|,
1898     ## or |http-equiv| attribute is not allowed. It is non-conforming
1899     ## anyway.
1900 wakaba 1.56
1901     ## TODO: |form| MUST be empty and in XML [WF2].
1902 wakaba 1.40 } else {
1903     $self->{onerror}->(node => $child_el,
1904     type => 'element not allowed:metadata',
1905 wakaba 1.104 level => $self->{level}->{must});
1906 wakaba 1.40 }
1907     $element_state->{in_head_original} = $self->{flag}->{in_head};
1908     $self->{flag}->{in_head} = 1;
1909     },
1910     check_child_text => sub {
1911     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1912     if ($has_significant) {
1913 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1914     level => $self->{level}->{must});
1915 wakaba 1.1 }
1916 wakaba 1.40 },
1917     check_end => sub {
1918     my ($self, $item, $element_state) = @_;
1919     unless ($element_state->{has_title}) {
1920     $self->{onerror}->(node => $item->{node},
1921 wakaba 1.104 type => 'child element missing',
1922     text => 'title',
1923 wakaba 1.105 level => $self->{level}->{must});
1924 wakaba 1.1 }
1925 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1926 wakaba 1.1
1927 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1928 wakaba 1.1 },
1929     };
1930    
1931 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1932     %HTMLTextChecker,
1933 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1934 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1935     %HTMLAttrStatus,
1936 wakaba 1.82 %XHTML2CommonAttrStatus,
1937 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1938     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1939     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1940     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1941 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1942 wakaba 1.49 }),
1943 wakaba 1.40 };
1944 wakaba 1.1
1945 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1946 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1947 wakaba 1.40 %HTMLEmptyChecker,
1948     check_attrs => sub {
1949     my ($self, $item, $element_state) = @_;
1950 wakaba 1.1
1951 wakaba 1.40 if ($self->{has_base}) {
1952     $self->{onerror}->(node => $item->{node},
1953 wakaba 1.104 type => 'element not allowed:base',
1954     level => $self->{level}->{must});
1955 wakaba 1.40 } else {
1956     $self->{has_base} = 1;
1957 wakaba 1.29 }
1958    
1959 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1960     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1961 wakaba 1.14
1962     if ($self->{has_uri_attr} and $has_href) {
1963 wakaba 1.4 ## ISSUE: Are these examples conforming?
1964     ## <head profile="a b c"><base href> (except for |profile|'s
1965     ## non-conformance)
1966     ## <title xml:base="relative"/><base href/> (maybe it should be)
1967     ## <unknown xmlns="relative"/><base href/> (assuming that
1968     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1969     ## <style>@import 'relative';</style><base href>
1970     ## <script>location.href = 'relative';</script><base href>
1971 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1972     ## an exception.
1973 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1974 wakaba 1.104 type => 'basehref after URL attribute',
1975     level => $self->{level}->{must});
1976 wakaba 1.4 }
1977 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1978 wakaba 1.4 ## ISSUE: Are these examples conforming?
1979     ## <head><title xlink:href=""/><base target="name"/></head>
1980     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1981     ## (assuming that |xbl:xbl| is allowed before |base|)
1982     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1983     ## <link href=""/><base target="name"/>
1984     ## <link rel=unknown href=""><base target=name>
1985 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1986 wakaba 1.104 type => 'basetarget after hyperlink',
1987     level => $self->{level}->{must});
1988 wakaba 1.4 }
1989    
1990 wakaba 1.14 if (not $has_href and not $has_target) {
1991 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1992 wakaba 1.104 type => 'attribute missing:href|target',
1993     level => $self->{level}->{must});
1994 wakaba 1.14 }
1995    
1996 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1997    
1998 wakaba 1.4 return $GetHTMLAttrsChecker->({
1999     href => $HTMLURIAttrChecker,
2000     target => $HTMLTargetAttrChecker,
2001 wakaba 1.49 }, {
2002     %HTMLAttrStatus,
2003 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2004     id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2005     target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2006 wakaba 1.40 })->($self, $item, $element_state);
2007 wakaba 1.4 },
2008 wakaba 1.1 };
2009    
2010     $Element->{$HTML_NS}->{link} = {
2011 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2012 wakaba 1.40 %HTMLEmptyChecker,
2013     check_attrs => sub {
2014     my ($self, $item, $element_state) = @_;
2015 wakaba 1.96 my $sizes_attr;
2016 wakaba 1.1 $GetHTMLAttrsChecker->({
2017 wakaba 1.91 charset => sub {
2018     my ($self, $attr) = @_;
2019     $HTMLCharsetChecker->($attr->value, @_);
2020     },
2021 wakaba 1.1 href => $HTMLURIAttrChecker,
2022 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2023 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2024 wakaba 1.1 media => $HTMLMQAttrChecker,
2025     hreflang => $HTMLLanguageTagAttrChecker,
2026 wakaba 1.96 sizes => sub {
2027     my ($self, $attr) = @_;
2028     $sizes_attr = $attr;
2029     my %word;
2030     for my $word (grep {length $_}
2031 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2032 wakaba 1.96 unless ($word{$word}) {
2033     $word{$word} = 1;
2034     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2035     #
2036     } else {
2037     $self->{onerror}->(node => $attr,
2038 wakaba 1.104 type => 'sizes:syntax error',
2039 wakaba 1.96 value => $word,
2040 wakaba 1.104 level => $self->{level}->{must});
2041 wakaba 1.96 }
2042     } else {
2043     $self->{onerror}->(node => $attr, type => 'duplicate token',
2044     value => $word,
2045 wakaba 1.104 level => $self->{level}->{must});
2046 wakaba 1.96 }
2047     }
2048     },
2049 wakaba 1.70 target => $HTMLTargetAttrChecker,
2050 wakaba 1.1 type => $HTMLIMTAttrChecker,
2051     ## NOTE: Though |title| has special semantics,
2052     ## syntactically same as the |title| as global attribute.
2053 wakaba 1.49 }, {
2054     %HTMLAttrStatus,
2055 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2056 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2057     ## NOTE: |charset| attribute had been part of HTML5 spec though
2058     ## it had been commented out.
2059 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2060 wakaba 1.82 FEATURE_M12N10_REC,
2061 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2062     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2063     media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2064 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2065 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2066 wakaba 1.153 FEATURE_M12N10_REC,
2067 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2068 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2069 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2070 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2071 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2072     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2073 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2074 wakaba 1.40 })->($self, $item, $element_state);
2075 wakaba 1.96
2076 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2077     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2078 wakaba 1.4 } else {
2079 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2080 wakaba 1.104 type => 'attribute missing',
2081     text => 'href',
2082     level => $self->{level}->{must});
2083 wakaba 1.1 }
2084 wakaba 1.96
2085 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2086     $self->{onerror}->(node => $item->{node},
2087 wakaba 1.104 type => 'attribute missing',
2088     text => 'rel',
2089     level => $self->{level}->{must});
2090 wakaba 1.96 }
2091    
2092     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2093     $self->{onerror}->(node => $sizes_attr,
2094     type => 'attribute not allowed',
2095 wakaba 1.104 level => $self->{level}->{must});
2096 wakaba 1.1 }
2097 wakaba 1.116
2098     if ($element_state->{link_rel}->{alternate} and
2099     $element_state->{link_rel}->{stylesheet}) {
2100     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2101     unless ($title_attr) {
2102     $self->{onerror}->(node => $item->{node},
2103     type => 'attribute missing',
2104     text => 'title',
2105     level => $self->{level}->{must});
2106     } elsif ($title_attr->value eq '') {
2107     $self->{onerror}->(node => $title_attr,
2108     type => 'empty style sheet title',
2109     level => $self->{level}->{must});
2110     }
2111     }
2112 wakaba 1.1 },
2113     };
2114    
2115     $Element->{$HTML_NS}->{meta} = {
2116 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2117 wakaba 1.40 %HTMLEmptyChecker,
2118     check_attrs => sub {
2119     my ($self, $item, $element_state) = @_;
2120 wakaba 1.1 my $name_attr;
2121     my $http_equiv_attr;
2122     my $charset_attr;
2123     my $content_attr;
2124 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2125 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2126     $attr_ns = '' unless defined $attr_ns;
2127     my $attr_ln = $attr->manakai_local_name;
2128     my $checker;
2129 wakaba 1.73 my $status;
2130 wakaba 1.1 if ($attr_ns eq '') {
2131 wakaba 1.73 $status = {
2132     %HTMLAttrStatus,
2133 wakaba 1.82 %XHTML2CommonAttrStatus,
2134 wakaba 1.153 charset => FEATURE_HTML5_WD,
2135     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2136     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2137     'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2138     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2139     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2140     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2141 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2142     }->{$attr_ln};
2143    
2144 wakaba 1.1 if ($attr_ln eq 'content') {
2145     $content_attr = $attr;
2146     $checker = 1;
2147     } elsif ($attr_ln eq 'name') {
2148     $name_attr = $attr;
2149     $checker = 1;
2150     } elsif ($attr_ln eq 'http-equiv') {
2151     $http_equiv_attr = $attr;
2152     $checker = 1;
2153     } elsif ($attr_ln eq 'charset') {
2154     $charset_attr = $attr;
2155     $checker = 1;
2156 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2157 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2158 wakaba 1.67 $checker = sub {};
2159 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2160     $attr_ln !~ /[A-Z]/) {
2161 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2162     $status = $HTMLDatasetAttrStatus;
2163 wakaba 1.1 } else {
2164     $checker = $HTMLAttrChecker->{$attr_ln}
2165 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2166 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2167     }
2168     } else {
2169     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2170 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2171     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2172     || $AttrStatus->{$attr_ns}->{''};
2173     $status = FEATURE_ALLOWED if not defined $status;
2174 wakaba 1.1 }
2175 wakaba 1.62
2176 wakaba 1.1 if ($checker) {
2177 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2178 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2179 wakaba 1.54 #
2180 wakaba 1.1 } else {
2181 wakaba 1.104 $self->{onerror}->(node => $attr,
2182     type => 'unknown attribute',
2183     level => $self->{level}->{uncertain});
2184 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2185     }
2186    
2187 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2188 wakaba 1.1 }
2189    
2190     if (defined $name_attr) {
2191     if (defined $http_equiv_attr) {
2192     $self->{onerror}->(node => $http_equiv_attr,
2193 wakaba 1.104 type => 'attribute not allowed',
2194     level => $self->{level}->{must});
2195 wakaba 1.1 } elsif (defined $charset_attr) {
2196     $self->{onerror}->(node => $charset_attr,
2197 wakaba 1.104 type => 'attribute not allowed',
2198     level => $self->{level}->{must});
2199 wakaba 1.1 }
2200     my $metadata_name = $name_attr->value;
2201     my $metadata_value;
2202     if (defined $content_attr) {
2203     $metadata_value = $content_attr->value;
2204     } else {
2205 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2206 wakaba 1.104 type => 'attribute missing',
2207     text => 'content',
2208     level => $self->{level}->{must});
2209 wakaba 1.1 $metadata_value = '';
2210     }
2211     } elsif (defined $http_equiv_attr) {
2212     if (defined $charset_attr) {
2213     $self->{onerror}->(node => $charset_attr,
2214 wakaba 1.104 type => 'attribute not allowed',
2215     level => $self->{level}->{must});
2216 wakaba 1.1 }
2217     unless (defined $content_attr) {
2218 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2219 wakaba 1.104 type => 'attribute missing',
2220     text => 'content',
2221     level => $self->{level}->{must});
2222 wakaba 1.1 }
2223     } elsif (defined $charset_attr) {
2224     if (defined $content_attr) {
2225     $self->{onerror}->(node => $content_attr,
2226 wakaba 1.104 type => 'attribute not allowed',
2227     level => $self->{level}->{must});
2228 wakaba 1.1 }
2229     } else {
2230     if (defined $content_attr) {
2231     $self->{onerror}->(node => $content_attr,
2232 wakaba 1.104 type => 'attribute not allowed',
2233     level => $self->{level}->{must});
2234 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2235 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2236     level => $self->{level}->{must});
2237 wakaba 1.1 } else {
2238 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2239 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2240     level => $self->{level}->{must});
2241 wakaba 1.1 }
2242     }
2243    
2244 wakaba 1.32 my $check_charset_decl = sub () {
2245 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2246 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2247     for my $el (@{$parent->child_nodes}) {
2248     next unless $el->node_type == 1; # ELEMENT_NODE
2249 wakaba 1.40 unless ($el eq $item->{node}) {
2250 wakaba 1.29 ## NOTE: Not the first child element.
2251 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2252 wakaba 1.32 type => 'element not allowed:meta charset',
2253 wakaba 1.104 level => $self->{level}->{must});
2254 wakaba 1.29 }
2255     last;
2256     ## NOTE: Entity references are not supported.
2257     }
2258     } else {
2259 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2260 wakaba 1.32 type => 'element not allowed:meta charset',
2261 wakaba 1.104 level => $self->{level}->{must});
2262 wakaba 1.29 }
2263 wakaba 1.32 }; # $check_charset_decl
2264 wakaba 1.21
2265 wakaba 1.32 my $check_charset = sub ($$) {
2266     my ($attr, $charset_value) = @_;
2267 wakaba 1.21
2268 wakaba 1.91 my $charset;
2269     ($charset, $charset_value)
2270     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2271    
2272 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2273 wakaba 1.21 if (defined $ic) {
2274     ## TODO: Test for this case
2275     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2276     if ($charset ne $ic_charset) {
2277 wakaba 1.32 $self->{onerror}->(node => $attr,
2278 wakaba 1.104 type => 'mismatched charset name',
2279 wakaba 1.106 text => $ic,
2280 wakaba 1.104 value => $charset_value,
2281     level => $self->{level}->{must});
2282 wakaba 1.21 }
2283     } else {
2284     ## NOTE: MUST, but not checkable, since the document is not originally
2285     ## in serialized form (or the parser does not preserve the input
2286     ## encoding information).
2287 wakaba 1.32 $self->{onerror}->(node => $attr,
2288 wakaba 1.104 type => 'mismatched charset name not checked',
2289     value => $charset_value,
2290     level => $self->{level}->{uncertain});
2291 wakaba 1.21 }
2292    
2293 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2294     $self->{onerror}->(node => $attr,
2295 wakaba 1.104 type => 'charref in charset',
2296     level => $self->{level}->{must},
2297     layer => 'syntax');
2298 wakaba 1.22 }
2299 wakaba 1.32 }; # $check_charset
2300    
2301     ## TODO: metadata conformance
2302    
2303     ## TODO: pragma conformance
2304     if (defined $http_equiv_attr) { ## An enumerated attribute
2305     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2306 wakaba 1.33
2307 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2308     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2309     node => $http_equiv_attr,
2310 wakaba 1.104 level => $self->{level}->{must});
2311 wakaba 1.85 } else {
2312     $self->{has_http_equiv}->{$keyword} = 1;
2313     }
2314    
2315     if ($keyword eq 'content-type') {
2316 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2317 wakaba 1.33
2318 wakaba 1.32 $check_charset_decl->();
2319 wakaba 1.182
2320     unless ($item->{node}->owner_document->manakai_is_html) {
2321     $self->{onerror}->(node => $item->{node},
2322     type => 'in XML:charset',
2323     level => $self->{level}->{must});
2324     }
2325    
2326 wakaba 1.32 if ($content_attr) {
2327     my $content = $content_attr->value;
2328 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2329 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2330 wakaba 1.58 =(.+)\z!sx) {
2331 wakaba 1.32 $check_charset->($content_attr, $1);
2332     } else {
2333     $self->{onerror}->(node => $content_attr,
2334     type => 'meta content-type syntax error',
2335 wakaba 1.104 level => $self->{level}->{must});
2336 wakaba 1.85 }
2337     }
2338     } elsif ($keyword eq 'default-style') {
2339     ## ISSUE: Not defined yet in the spec.
2340     } elsif ($keyword eq 'refresh') {
2341     if ($content_attr) {
2342     my $content = $content_attr->value;
2343     if ($content =~ /\A[0-9]+\z/) {
2344     ## NOTE: Valid non-negative integer.
2345     #
2346 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2347 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2348     Whatpm::URIChecker->check_iri_reference ($content, sub {
2349 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2350 wakaba 1.106 }, $self->{level});
2351 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2352    
2353     $element_state->{uri_info}->{content}->{node} = $content_attr;
2354     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2355     ## TODO: absolute
2356     push @{$self->{return}->{uri}->{$content} ||= []},
2357     $element_state->{uri_info}->{content};
2358     } else {
2359     $self->{onerror}->(node => $content_attr,
2360     type => 'refresh:syntax error',
2361 wakaba 1.104 level => $self->{level}->{must});
2362 wakaba 1.32 }
2363     }
2364     } else {
2365     $self->{onerror}->(node => $http_equiv_attr,
2366 wakaba 1.104 type => 'enumerated:invalid',
2367     level => $self->{level}->{must});
2368 wakaba 1.32 }
2369     }
2370    
2371     if (defined $charset_attr) {
2372 wakaba 1.182 my $value = $charset_attr->value;
2373    
2374 wakaba 1.32 $check_charset_decl->();
2375 wakaba 1.182 $check_charset->($charset_attr, $value);
2376    
2377     if (not $item->{node}->owner_document->manakai_is_html and
2378     not $value =~ /\A[Uu][Tt][Ff]-8\z/) {
2379     $self->{onerror}->(node => $item->{node},
2380     type => 'in XML:charset',
2381     level => $self->{level}->{must});
2382     }
2383 wakaba 1.1 }
2384     },
2385     };
2386    
2387     $Element->{$HTML_NS}->{style} = {
2388 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2389 wakaba 1.40 %HTMLChecker,
2390     check_attrs => $GetHTMLAttrsChecker->({
2391 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2392     media => $HTMLMQAttrChecker,
2393     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2394     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2395     ## not different
2396 wakaba 1.49 }, {
2397     %HTMLAttrStatus,
2398 wakaba 1.82 %XHTML2CommonAttrStatus,
2399 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2400 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2401 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2402 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2403     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2404     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2405     scoped => FEATURE_HTML5_FD,
2406     title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2407     type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2408 wakaba 1.1 }),
2409 wakaba 1.40 check_start => sub {
2410     my ($self, $item, $element_state) = @_;
2411    
2412 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2413 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2414 wakaba 1.93 $type = 'text/css' unless defined $type;
2415     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2416     $type = "$1/$2";
2417     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2418     } else {
2419     ## NOTE: We don't know how parameters are handled by UAs. According to
2420     ## HTML5 specification, <style> with unknown parameters in |type=""|
2421     ## must be ignored.
2422     undef $type;
2423     }
2424     if (not defined $type) {
2425     $element_state->{allow_element} = 1; # invalid type=""
2426     } elsif ($type eq 'text/css') {
2427 wakaba 1.40 $element_state->{allow_element} = 0;
2428 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2429     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2430     # $element_state->{allow_element} = 1;
2431 wakaba 1.40 } else {
2432     $element_state->{allow_element} = 1; # unknown
2433     }
2434 wakaba 1.93 $element_state->{style_type} = $type;
2435 wakaba 1.79
2436     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2437     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2438 wakaba 1.107
2439     $element_state->{text} = '';
2440 wakaba 1.40 },
2441     check_child_element => sub {
2442     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2443     $child_is_transparent, $element_state) = @_;
2444 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2445     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2446 wakaba 1.40 $self->{onerror}->(node => $child_el,
2447     type => 'element not allowed:minus',
2448 wakaba 1.104 level => $self->{level}->{must});
2449 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2450     #
2451     } elsif ($element_state->{allow_element}) {
2452     #
2453     } else {
2454 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2455     level => $self->{level}->{must});
2456 wakaba 1.40 }
2457     },
2458     check_child_text => sub {
2459     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2460 wakaba 1.115 $element_state->{text} .= $child_node->data;
2461 wakaba 1.40 },
2462     check_end => sub {
2463     my ($self, $item, $element_state) = @_;
2464 wakaba 1.93 if (not defined $element_state->{style_type}) {
2465     ## NOTE: Invalid type=""
2466     #
2467     } elsif ($element_state->{style_type} eq 'text/css') {
2468 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2469     container_node => $item->{node},
2470 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2471 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2472     ## NOTE: XML content should be checked by THIS instance of checker
2473     ## as part of normal tree validation. However, we don't know of any
2474     ## XML-based styling language that can be used in HTML <style> element,
2475     ## such that we throw a "style language not supported" error.
2476 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2477     type => 'XML style lang',
2478     text => $element_state->{style_type},
2479     level => $self->{level}->{uncertain});
2480 wakaba 1.93 } else {
2481     ## NOTE: Should we raise some kind of error for,
2482     ## say, <style type="text/plaion">?
2483     $self->{onsubdoc}->({s => $element_state->{text},
2484     container_node => $item->{node},
2485     media_type => $element_state->{style_type},
2486     is_char_string => 1});
2487 wakaba 1.27 }
2488 wakaba 1.40
2489     $HTMLChecker{check_end}->(@_);
2490 wakaba 1.1 },
2491     };
2492 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2493 wakaba 1.1
2494     $Element->{$HTML_NS}->{body} = {
2495 wakaba 1.72 %HTMLFlowContentChecker,
2496 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2497 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2498     alink => $HTMLColorAttrChecker,
2499     background => $HTMLURIAttrChecker,
2500     bgcolor => $HTMLColorAttrChecker,
2501     link => $HTMLColorAttrChecker,
2502     text => $HTMLColorAttrChecker,
2503     vlink => $HTMLColorAttrChecker,
2504     }, {
2505 wakaba 1.49 %HTMLAttrStatus,
2506 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2507 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2508     background => FEATURE_M12N10_REC_DEPRECATED,
2509     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2510 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2511 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2512 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2513     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2514 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2515     vlink => FEATURE_M12N10_REC_DEPRECATED,
2516     }),
2517 wakaba 1.68 check_start => sub {
2518     my ($self, $item, $element_state) = @_;
2519    
2520     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2521 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2522     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2523 wakaba 1.68 },
2524 wakaba 1.1 };
2525    
2526     $Element->{$HTML_NS}->{section} = {
2527 wakaba 1.72 %HTMLFlowContentChecker,
2528 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2529 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2530     }, {
2531     %HTMLAttrStatus,
2532     %XHTML2CommonAttrStatus,
2533     }),
2534 wakaba 1.1 };
2535    
2536     $Element->{$HTML_NS}->{nav} = {
2537 wakaba 1.153 status => FEATURE_HTML5_LC,
2538 wakaba 1.72 %HTMLFlowContentChecker,
2539 wakaba 1.1 };
2540    
2541     $Element->{$HTML_NS}->{article} = {
2542 wakaba 1.174 %HTMLFlowContentChecker,
2543 wakaba 1.153 status => FEATURE_HTML5_LC,
2544 wakaba 1.174 check_attrs => $GetHTMLAttrsChecker->({
2545     pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2546     }, {
2547     %HTMLAttrStatus,
2548     # XXX cite
2549     pubdate => FEATURE_HTML5_LC,
2550     }),
2551     }; # article
2552 wakaba 1.1
2553     $Element->{$HTML_NS}->{blockquote} = {
2554 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2555 wakaba 1.72 %HTMLFlowContentChecker,
2556 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2557 wakaba 1.1 cite => $HTMLURIAttrChecker,
2558 wakaba 1.49 }, {
2559     %HTMLAttrStatus,
2560 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2561 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2562 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2563 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2564 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2565 wakaba 1.1 }),
2566 wakaba 1.66 check_start => sub {
2567     my ($self, $item, $element_state) = @_;
2568    
2569     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2570 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2571     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2572 wakaba 1.66 },
2573 wakaba 1.1 };
2574    
2575     $Element->{$HTML_NS}->{aside} = {
2576 wakaba 1.153 status => FEATURE_HTML5_LC,
2577 wakaba 1.72 %HTMLFlowContentChecker,
2578 wakaba 1.1 };
2579    
2580     $Element->{$HTML_NS}->{h1} = {
2581 wakaba 1.40 %HTMLPhrasingContentChecker,
2582 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2583 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2584     align => $GetHTMLEnumeratedAttrChecker->({
2585     left => 1, center => 1, right => 1, justify => 1,
2586     }),
2587     }, {
2588 wakaba 1.49 %HTMLAttrStatus,
2589 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2590 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2591 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2592 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2593 wakaba 1.49 }),
2594 wakaba 1.40 check_start => sub {
2595     my ($self, $item, $element_state) = @_;
2596     $self->{flag}->{has_hn} = 1;
2597 wakaba 1.79
2598     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2599     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2600 wakaba 1.1 },
2601     };
2602    
2603 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2604 wakaba 1.1
2605 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2606 wakaba 1.1
2607 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2608 wakaba 1.1
2609 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2610 wakaba 1.1
2611 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2612 wakaba 1.1
2613 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2614 wakaba 1.174
2615     # XXX footer in header is disallowed (HTML5 revision 3050)
2616 wakaba 1.29
2617 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2618 wakaba 1.153 status => FEATURE_HTML5_LC,
2619 wakaba 1.72 %HTMLFlowContentChecker,
2620 wakaba 1.40 check_start => sub {
2621     my ($self, $item, $element_state) = @_;
2622     $self->_add_minus_elements ($element_state,
2623     {$HTML_NS => {qw/header 1 footer 1/}},
2624 wakaba 1.58 $HTMLSectioningContent);
2625 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2626     $self->{flag}->{has_hn} = 0;
2627 wakaba 1.79
2628     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2629     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2630 wakaba 1.40 },
2631     check_end => sub {
2632     my ($self, $item, $element_state) = @_;
2633     $self->_remove_minus_elements ($element_state);
2634     unless ($self->{flag}->{has_hn}) {
2635     $self->{onerror}->(node => $item->{node},
2636 wakaba 1.104 type => 'element missing:hn',
2637     level => $self->{level}->{must});
2638 wakaba 1.40 }
2639     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2640 wakaba 1.1
2641 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2642 wakaba 1.1 },
2643 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2644 wakaba 1.1 };
2645    
2646     $Element->{$HTML_NS}->{footer} = {
2647 wakaba 1.153 status => FEATURE_HTML5_LC,
2648 wakaba 1.72 %HTMLFlowContentChecker,
2649 wakaba 1.40 check_start => sub {
2650     my ($self, $item, $element_state) = @_;
2651     $self->_add_minus_elements ($element_state,
2652 wakaba 1.177 {$HTML_NS => {header => 1, footer => 1}},
2653 wakaba 1.58 $HTMLSectioningContent,
2654 wakaba 1.57 $HTMLHeadingContent);
2655 wakaba 1.79
2656     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2657     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2658 wakaba 1.40 },
2659     check_end => sub {
2660     my ($self, $item, $element_state) = @_;
2661     $self->_remove_minus_elements ($element_state);
2662 wakaba 1.1
2663 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2664 wakaba 1.1 },
2665     };
2666    
2667     $Element->{$HTML_NS}->{address} = {
2668 wakaba 1.72 %HTMLFlowContentChecker,
2669 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2670 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2671     ## TODO: add test
2672     #align => $GetHTMLEnumeratedAttrChecker->({
2673     # left => 1, center => 1, right => 1, justify => 1,
2674     #}),
2675     }, {
2676 wakaba 1.49 %HTMLAttrStatus,
2677 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2678 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2679 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2680 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2681     sdapref => FEATURE_HTML20_RFC,
2682 wakaba 1.49 }),
2683 wakaba 1.40 check_start => sub {
2684     my ($self, $item, $element_state) = @_;
2685 wakaba 1.177 $self->_add_minus_elements
2686     ($element_state,
2687     {$HTML_NS => {header => 1, footer => 1, address => 1}},
2688     $HTMLSectioningContent, $HTMLHeadingContent);
2689 wakaba 1.79
2690     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2691     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2692 wakaba 1.40 },
2693     check_end => sub {
2694     my ($self, $item, $element_state) = @_;
2695     $self->_remove_minus_elements ($element_state);
2696 wakaba 1.29
2697 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2698 wakaba 1.29 },
2699 wakaba 1.1 };
2700    
2701     $Element->{$HTML_NS}->{p} = {
2702 wakaba 1.40 %HTMLPhrasingContentChecker,
2703 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2704 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2705     align => $GetHTMLEnumeratedAttrChecker->({
2706     left => 1, center => 1, right => 1, justify => 1,
2707     }),
2708     }, {
2709 wakaba 1.49 %HTMLAttrStatus,
2710 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2711 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2712 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2713 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2714 wakaba 1.49 }),
2715 wakaba 1.1 };
2716    
2717     $Element->{$HTML_NS}->{hr} = {
2718 wakaba 1.40 %HTMLEmptyChecker,
2719 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2720 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2721     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2722     }, {
2723 wakaba 1.49 %HTMLAttrStatus,
2724     %HTMLM12NCommonAttrStatus,
2725     align => FEATURE_M12N10_REC_DEPRECATED,
2726 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2727 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2728 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2729 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2730     width => FEATURE_M12N10_REC_DEPRECATED,
2731     }),
2732 wakaba 1.1 };
2733    
2734     $Element->{$HTML_NS}->{br} = {
2735 wakaba 1.40 %HTMLEmptyChecker,
2736 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2737 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2738     clear => $GetHTMLEnumeratedAttrChecker->({
2739     left => 1, all => 1, right => 1, none => 1,
2740     }),
2741     }, {
2742 wakaba 1.49 %HTMLAttrStatus,
2743 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2744 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2745 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2746 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2747 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2748     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2749 wakaba 1.49 }),
2750 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2751     ## (This requirement is semantic so that we cannot check.)
2752 wakaba 1.1 };
2753    
2754     $Element->{$HTML_NS}->{dialog} = {
2755 wakaba 1.153 status => FEATURE_HTML5_WD,
2756 wakaba 1.40 %HTMLChecker,
2757     check_start => sub {
2758     my ($self, $item, $element_state) = @_;
2759     $element_state->{phase} = 'before dt';
2760 wakaba 1.79
2761     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2762     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2763 wakaba 1.40 },
2764     check_child_element => sub {
2765     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2766     $child_is_transparent, $element_state) = @_;
2767 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2768     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2769 wakaba 1.40 $self->{onerror}->(node => $child_el,
2770     type => 'element not allowed:minus',
2771 wakaba 1.104 level => $self->{level}->{must});
2772 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2773     #
2774     } elsif ($element_state->{phase} eq 'before dt') {
2775     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2776     $element_state->{phase} = 'before dd';
2777     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2778     $self->{onerror}
2779 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2780     text => 'dt',
2781     level => $self->{level}->{must});
2782 wakaba 1.40 $element_state->{phase} = 'before dt';
2783     } else {
2784 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2785     level => $self->{level}->{must});
2786 wakaba 1.40 }
2787     } elsif ($element_state->{phase} eq 'before dd') {
2788     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2789     $element_state->{phase} = 'before dt';
2790     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2791     $self->{onerror}
2792 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2793     text => 'dd',
2794     level => $self->{level}->{must});
2795 wakaba 1.40 $element_state->{phase} = 'before dd';
2796     } else {
2797 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2798     level => $self->{level}->{must});
2799 wakaba 1.1 }
2800 wakaba 1.40 } else {
2801     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2802     }
2803     },
2804     check_child_text => sub {
2805     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2806     if ($has_significant) {
2807 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2808     level => $self->{level}->{must});
2809 wakaba 1.1 }
2810 wakaba 1.40 },
2811     check_end => sub {
2812     my ($self, $item, $element_state) = @_;
2813     if ($element_state->{phase} eq 'before dd') {
2814     $self->{onerror}->(node => $item->{node},
2815 wakaba 1.104 type => 'child element missing',
2816     text => 'dd',
2817     level => $self->{level}->{must});
2818 wakaba 1.1 }
2819 wakaba 1.40
2820     $HTMLChecker{check_end}->(@_);
2821 wakaba 1.1 },
2822     };
2823    
2824     $Element->{$HTML_NS}->{pre} = {
2825 wakaba 1.40 %HTMLPhrasingContentChecker,
2826 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2827 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2828     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2829     }, {
2830 wakaba 1.49 %HTMLAttrStatus,
2831 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2832 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2833 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2834 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2835     }),
2836 wakaba 1.101 check_end => sub {
2837     my ($self, $item, $element_state) = @_;
2838    
2839     ## TODO: Flag to enable/disable IDL checking?
2840 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2841 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2842     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2843     ## NOTE: pre.code > code.idl-code: WebIDL spec
2844     ## NOTE: pre.idl-code: DOM1 spec
2845     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2846     ## NOTE: pre.schema: ReSpec-generated specs
2847 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2848     container_node => $item->{node},
2849     media_type => 'text/x-webidl',
2850     is_char_string => 1});
2851     }
2852    
2853 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2854 wakaba 1.101 },
2855 wakaba 1.1 };
2856    
2857     $Element->{$HTML_NS}->{ol} = {
2858 wakaba 1.40 %HTMLChecker,
2859 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2860 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2861 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2862 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2863 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2864 wakaba 1.69 ## TODO: HTML4 |type|
2865 wakaba 1.49 }, {
2866     %HTMLAttrStatus,
2867 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2868 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2869 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2870 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2871     reversed => FEATURE_HTML5_WD,
2872 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2873 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2874     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2875 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2876 wakaba 1.1 }),
2877 wakaba 1.40 check_child_element => sub {
2878     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2879     $child_is_transparent, $element_state) = @_;
2880 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2881     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2882 wakaba 1.40 $self->{onerror}->(node => $child_el,
2883     type => 'element not allowed:minus',
2884 wakaba 1.104 level => $self->{level}->{must});
2885 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2886     #
2887     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2888     #
2889     } else {
2890 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2891     level => $self->{level}->{must});
2892 wakaba 1.1 }
2893 wakaba 1.40 },
2894     check_child_text => sub {
2895     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2896     if ($has_significant) {
2897 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2898     level => $self->{level}->{must});
2899 wakaba 1.1 }
2900     },
2901     };
2902    
2903     $Element->{$HTML_NS}->{ul} = {
2904 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2905 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2906 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2907     compact => $GetHTMLBooleanAttrChecker->('compact'),
2908 wakaba 1.69 ## TODO: HTML4 |type|
2909     ## TODO: sdaform, align
2910 wakaba 1.68 }, {
2911 wakaba 1.49 %HTMLAttrStatus,
2912 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2913 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2914 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2915 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2916 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2917 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2918     }),
2919 wakaba 1.1 };
2920    
2921 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2922     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2923     %{$Element->{$HTML_NS}->{ul}},
2924     status => FEATURE_M12N10_REC_DEPRECATED,
2925 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2926     compact => $GetHTMLBooleanAttrChecker->('compact'),
2927     }, {
2928 wakaba 1.64 %HTMLAttrStatus,
2929     %HTMLM12NCommonAttrStatus,
2930     align => FEATURE_HTML2X_RFC,
2931     compact => FEATURE_M12N10_REC_DEPRECATED,
2932 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2933 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2934     sdapref => FEATURE_HTML20_RFC,
2935     }),
2936     };
2937    
2938 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2939 wakaba 1.72 %HTMLFlowContentChecker,
2940 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2941 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2942 wakaba 1.69 ## TODO: HTML4 |type|
2943 wakaba 1.49 value => sub {
2944 wakaba 1.1 my ($self, $attr) = @_;
2945 wakaba 1.152
2946     my $parent_is_ol;
2947 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2948     if (defined $parent) {
2949     my $parent_ns = $parent->namespace_uri;
2950     $parent_ns = '' unless defined $parent_ns;
2951     my $parent_ln = $parent->manakai_local_name;
2952 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2953     }
2954    
2955     unless ($parent_is_ol) {
2956     ## ISSUE: No "MUST" in the spec.
2957     $self->{onerror}->(node => $attr,
2958     type => 'non-ol li value',
2959     level => $self->{level}->{html5_fact});
2960 wakaba 1.1 }
2961 wakaba 1.152
2962 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2963 wakaba 1.131 },
2964 wakaba 1.49 }, {
2965     %HTMLAttrStatus,
2966 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2967 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2968 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2969 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2970 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2971 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2972 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2973 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2974 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2975 wakaba 1.1 }),
2976 wakaba 1.40 check_child_element => sub {
2977     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2978     $child_is_transparent, $element_state) = @_;
2979     if ($self->{flag}->{in_menu}) {
2980 wakaba 1.152 ## TODO: In <dir> element, then ...
2981 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2982     } else {
2983 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2984 wakaba 1.40 }
2985     },
2986     check_child_text => sub {
2987     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2988     if ($self->{flag}->{in_menu}) {
2989 wakaba 1.152 ## TODO: In <dir> element, then ...
2990 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2991 wakaba 1.1 } else {
2992 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
2993 wakaba 1.1 }
2994     },
2995     };
2996    
2997     $Element->{$HTML_NS}->{dl} = {
2998 wakaba 1.40 %HTMLChecker,
2999 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3000 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
3001     compact => $GetHTMLBooleanAttrChecker->('compact'),
3002     }, {
3003 wakaba 1.49 %HTMLAttrStatus,
3004 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3005 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
3006 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3007 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3008     sdapref => FEATURE_HTML20_RFC,
3009 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3010     }),
3011 wakaba 1.40 check_start => sub {
3012     my ($self, $item, $element_state) = @_;
3013     $element_state->{phase} = 'before dt';
3014 wakaba 1.79
3015     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3016     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3017 wakaba 1.40 },
3018     check_child_element => sub {
3019     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3020     $child_is_transparent, $element_state) = @_;
3021 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3022     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3023 wakaba 1.40 $self->{onerror}->(node => $child_el,
3024     type => 'element not allowed:minus',
3025 wakaba 1.104 level => $self->{level}->{must});
3026 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3027     #
3028     } elsif ($element_state->{phase} eq 'in dds') {
3029     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3030     #$element_state->{phase} = 'in dds';
3031     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3032     $element_state->{phase} = 'in dts';
3033     } else {
3034 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3035     level => $self->{level}->{must});
3036 wakaba 1.40 }
3037     } elsif ($element_state->{phase} eq 'in dts') {
3038     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3039     #$element_state->{phase} = 'in dts';
3040     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3041     $element_state->{phase} = 'in dds';
3042     } else {
3043 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3044     level => $self->{level}->{must});
3045 wakaba 1.40 }
3046     } elsif ($element_state->{phase} eq 'before dt') {
3047     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3048     $element_state->{phase} = 'in dts';
3049     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3050     $self->{onerror}
3051 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3052     text => 'dt',
3053     level => $self->{level}->{must});
3054 wakaba 1.40 $element_state->{phase} = 'in dds';
3055     } else {
3056 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3057     level => $self->{level}->{must});
3058 wakaba 1.1 }
3059 wakaba 1.40 } else {
3060     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3061 wakaba 1.1 }
3062 wakaba 1.40 },
3063     check_child_text => sub {
3064     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3065     if ($has_significant) {
3066 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3067     level => $self->{level}->{must});
3068 wakaba 1.40 }
3069     },
3070     check_end => sub {
3071     my ($self, $item, $element_state) = @_;
3072     if ($element_state->{phase} eq 'in dts') {
3073     $self->{onerror}->(node => $item->{node},
3074 wakaba 1.104 type => 'child element missing',
3075     text => 'dd',
3076     level => $self->{level}->{must});
3077 wakaba 1.1 }
3078    
3079 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3080 wakaba 1.1 },
3081     };
3082    
3083     $Element->{$HTML_NS}->{dt} = {
3084 wakaba 1.40 %HTMLPhrasingContentChecker,
3085 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3086 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3087     %HTMLAttrStatus,
3088 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3089 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3090 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3091 wakaba 1.49 }),
3092 wakaba 1.1 };
3093    
3094     $Element->{$HTML_NS}->{dd} = {
3095 wakaba 1.72 %HTMLFlowContentChecker,
3096 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3097 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3098     %HTMLAttrStatus,
3099 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3100 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3101 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3102 wakaba 1.49 }),
3103 wakaba 1.1 };
3104    
3105     $Element->{$HTML_NS}->{a} = {
3106 wakaba 1.123 %HTMLTransparentChecker,
3107 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3108 wakaba 1.40 check_attrs => sub {
3109     my ($self, $item, $element_state) = @_;
3110 wakaba 1.1 my %attr;
3111 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3112 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3113     $attr_ns = '' unless defined $attr_ns;
3114     my $attr_ln = $attr->manakai_local_name;
3115     my $checker;
3116 wakaba 1.73 my $status;
3117 wakaba 1.1 if ($attr_ns eq '') {
3118 wakaba 1.73 $status = {
3119     %HTMLAttrStatus,
3120 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3121 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
3122 wakaba 1.73 charset => FEATURE_M12N10_REC,
3123 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3124 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3125     dn => FEATURE_RFC2659,
3126 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3127 wakaba 1.153 FEATURE_M12N10_REC,
3128     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3129     FEATURE_M12N10_REC,
3130     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3131     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3132 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3133     name => FEATURE_M12N10_REC_DEPRECATED,
3134     nonce => FEATURE_RFC2659,
3135     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3136     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3137 wakaba 1.153 ping => FEATURE_HTML5_WD,
3138 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3139     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3140 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3141 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3142 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3143 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3144     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3145 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3146     }->{$attr_ln};
3147    
3148 wakaba 1.1 $checker = {
3149 wakaba 1.91 charset => sub {
3150     my ($self, $attr) = @_;
3151     $HTMLCharsetChecker->($attr->value, @_);
3152     },
3153 wakaba 1.70 ## TODO: HTML4 |coords|
3154 wakaba 1.1 target => $HTMLTargetAttrChecker,
3155     href => $HTMLURIAttrChecker,
3156     ping => $HTMLSpaceURIsAttrChecker,
3157 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3158 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3159 wakaba 1.70 ## TODO: HTML4 |shape|
3160 wakaba 1.1 media => $HTMLMQAttrChecker,
3161 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3162 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3163     type => $HTMLIMTAttrChecker,
3164     }->{$attr_ln};
3165     if ($checker) {
3166     $attr{$attr_ln} = $attr;
3167 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3168     $attr_ln !~ /[A-Z]/) {
3169 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3170     $status = $HTMLDatasetAttrStatus;
3171 wakaba 1.1 } else {
3172     $checker = $HTMLAttrChecker->{$attr_ln};
3173     }
3174     }
3175     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3176     || $AttrChecker->{$attr_ns}->{''};
3177 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3178     || $AttrStatus->{$attr_ns}->{''};
3179     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3180 wakaba 1.62
3181 wakaba 1.1 if ($checker) {
3182 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3183 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3184 wakaba 1.54 #
3185 wakaba 1.1 } else {
3186 wakaba 1.104 $self->{onerror}->(node => $attr,
3187     type => 'unknown attribute',
3188     level => $self->{level}->{uncertain});
3189 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3190 wakaba 1.1 }
3191 wakaba 1.49
3192 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3193 wakaba 1.1 }
3194    
3195 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3196 wakaba 1.4 if (defined $attr{href}) {
3197     $self->{has_hyperlink_element} = 1;
3198 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3199 wakaba 1.4 } else {
3200 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3201     if (defined $attr{$_}) {
3202     $self->{onerror}->(node => $attr{$_},
3203 wakaba 1.104 type => 'attribute not allowed',
3204     level => $self->{level}->{must});
3205 wakaba 1.1 }
3206     }
3207     }
3208 wakaba 1.66
3209     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3210 wakaba 1.1 },
3211 wakaba 1.40 check_start => sub {
3212     my ($self, $item, $element_state) = @_;
3213     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3214 wakaba 1.79
3215     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3216     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3217 wakaba 1.40 },
3218     check_end => sub {
3219     my ($self, $item, $element_state) = @_;
3220     $self->_remove_minus_elements ($element_state);
3221 wakaba 1.59 delete $self->{flag}->{in_a_href}
3222     unless $element_state->{in_a_href_original};
3223 wakaba 1.1
3224 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3225 wakaba 1.1 },
3226     };
3227    
3228     $Element->{$HTML_NS}->{q} = {
3229 wakaba 1.153 status => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3230 wakaba 1.40 %HTMLPhrasingContentChecker,
3231     check_attrs => $GetHTMLAttrsChecker->({
3232 wakaba 1.50 cite => $HTMLURIAttrChecker,
3233     }, {
3234 wakaba 1.49 %HTMLAttrStatus,
3235 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3236 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3237     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3238 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3239     sdasuff => FEATURE_HTML2X_RFC,
3240 wakaba 1.1 }),
3241 wakaba 1.66 check_start => sub {
3242     my ($self, $item, $element_state) = @_;
3243    
3244     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3245 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3246     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3247 wakaba 1.66 },
3248 wakaba 1.1 };
3249 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3250     ## placed inside the <code>q</code> element." Though we cannot test the
3251     ## element against this requirement since it incluides a semantic bit,
3252     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3253     ## the |q| element.
3254 wakaba 1.1
3255     $Element->{$HTML_NS}->{cite} = {
3256 wakaba 1.40 %HTMLPhrasingContentChecker,
3257 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3258 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3259     %HTMLAttrStatus,
3260 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3261 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3262 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3263 wakaba 1.49 }),
3264 wakaba 1.1 };
3265    
3266     $Element->{$HTML_NS}->{em} = {
3267 wakaba 1.40 %HTMLPhrasingContentChecker,
3268 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3269 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3270     %HTMLAttrStatus,
3271 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3272 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3273 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3274 wakaba 1.49 }),
3275 wakaba 1.1 };
3276    
3277     $Element->{$HTML_NS}->{strong} = {
3278 wakaba 1.40 %HTMLPhrasingContentChecker,
3279 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3280 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3281     %HTMLAttrStatus,
3282 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3283 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3284 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3285 wakaba 1.49 }),
3286 wakaba 1.1 };
3287    
3288     $Element->{$HTML_NS}->{small} = {
3289 wakaba 1.40 %HTMLPhrasingContentChecker,
3290 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3291 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3292     %HTMLAttrStatus,
3293     %HTMLM12NCommonAttrStatus,
3294 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3295 wakaba 1.49 }),
3296 wakaba 1.1 };
3297    
3298 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3299     %HTMLPhrasingContentChecker,
3300     status => FEATURE_M12N10_REC,
3301     check_attrs => $GetHTMLAttrsChecker->({}, {
3302     %HTMLAttrStatus,
3303     %HTMLM12NCommonAttrStatus,
3304 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3305 wakaba 1.51 }),
3306     };
3307    
3308 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3309 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3310 wakaba 1.40 %HTMLPhrasingContentChecker,
3311 wakaba 1.1 };
3312    
3313     $Element->{$HTML_NS}->{dfn} = {
3314 wakaba 1.40 %HTMLPhrasingContentChecker,
3315 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3316 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3317     %HTMLAttrStatus,
3318 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3319 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3320 wakaba 1.49 }),
3321 wakaba 1.40 check_start => sub {
3322     my ($self, $item, $element_state) = @_;
3323     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3324 wakaba 1.1
3325 wakaba 1.40 my $node = $item->{node};
3326 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3327     unless (defined $term) {
3328     for my $child (@{$node->child_nodes}) {
3329     if ($child->node_type == 1) { # ELEMENT_NODE
3330     if (defined $term) {
3331     undef $term;
3332     last;
3333     } elsif ($child->manakai_local_name eq 'abbr') {
3334     my $nsuri = $child->namespace_uri;
3335     if (defined $nsuri and $nsuri eq $HTML_NS) {
3336     my $attr = $child->get_attribute_node_ns (undef, 'title');
3337     if ($attr) {
3338     $term = $attr->value;
3339     }
3340     }
3341     }
3342     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3343     ## TEXT_NODE or CDATA_SECTION_NODE
3344 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3345 wakaba 1.1 next;
3346     }
3347     undef $term;
3348     last;
3349     }
3350     }
3351     unless (defined $term) {
3352     $term = $node->text_content;
3353     }
3354     }
3355     if ($self->{term}->{$term}) {
3356     push @{$self->{term}->{$term}}, $node;
3357     } else {
3358     $self->{term}->{$term} = [$node];
3359     }
3360 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3361     ## |ruby| unless |dfn| has |title|.
3362 wakaba 1.79
3363     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3364     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3365 wakaba 1.40 },
3366     check_end => sub {
3367     my ($self, $item, $element_state) = @_;
3368     $self->_remove_minus_elements ($element_state);
3369 wakaba 1.1
3370 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3371 wakaba 1.1 },
3372     };
3373    
3374     $Element->{$HTML_NS}->{abbr} = {
3375 wakaba 1.40 %HTMLPhrasingContentChecker,
3376 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3377 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3378     %HTMLAttrStatus,
3379 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3380     full => FEATURE_XHTML2_ED,
3381 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3382 wakaba 1.49 }),
3383 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3384     ## number (plural vs singular) must match the grammatical number of the
3385     ## contents of the element." Though this can be checked by machine,
3386     ## it requires language-specific knowledge and dictionary, such that
3387     ## we don't support the check of the requirement.
3388     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3389 wakaba 1.49 };
3390    
3391     $Element->{$HTML_NS}->{acronym} = {
3392     %HTMLPhrasingContentChecker,
3393     status => FEATURE_M12N10_REC,
3394     check_attrs => $GetHTMLAttrsChecker->({}, {
3395     %HTMLAttrStatus,
3396     %HTMLM12NCommonAttrStatus,
3397 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3398 wakaba 1.49 }),
3399 wakaba 1.1 };
3400    
3401     $Element->{$HTML_NS}->{time} = {
3402 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3403 wakaba 1.40 %HTMLPhrasingContentChecker,
3404     check_attrs => $GetHTMLAttrsChecker->({
3405 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3406 wakaba 1.49 }, {
3407     %HTMLAttrStatus,
3408     %HTMLM12NCommonAttrStatus,
3409 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3410 wakaba 1.1 }),
3411 wakaba 1.168 ## TODO: Update definition
3412 wakaba 1.1 ## TODO: Write tests
3413 wakaba 1.40 check_end => sub {
3414     my ($self, $item, $element_state) = @_;
3415 wakaba 1.1
3416 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3417 wakaba 1.1 my $input;
3418     my $reg_sp;
3419     my $input_node;
3420     if ($attr) {
3421     $input = $attr->value;
3422 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3423 wakaba 1.1 $input_node = $attr;
3424     } else {
3425 wakaba 1.40 $input = $item->{node}->text_content;
3426 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3427 wakaba 1.40 $input_node = $item->{node};
3428 wakaba 1.1
3429     ## ISSUE: What is the definition for "successfully extracts a date
3430     ## or time"? If the algorithm says the string is invalid but
3431     ## return some date or time, is it "successfully"?
3432     }
3433    
3434     my $hour;
3435     my $minute;
3436     my $second;
3437     if ($input =~ /
3438     \A
3439 wakaba 1.112 $reg_sp
3440 wakaba 1.1 ([0-9]+) # 1
3441     (?>
3442     -([0-9]+) # 2
3443 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3444     $reg_sp
3445 wakaba 1.1 (?>
3446     T
3447 wakaba 1.112 $reg_sp
3448 wakaba 1.1 )?
3449     ([0-9]+) # 4
3450     :([0-9]+) # 5
3451     (?>
3452     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3453     )?
3454 wakaba 1.112 $reg_sp
3455 wakaba 1.1 (?>
3456     Z
3457 wakaba 1.112 $reg_sp
3458 wakaba 1.1 |
3459     [+-]([0-9]+):([0-9]+) # 7, 8
3460 wakaba 1.112 $reg_sp
3461 wakaba 1.1 )?
3462     \z
3463     |
3464     :([0-9]+) # 9
3465     (?>
3466     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3467     )?
3468 wakaba 1.112 $reg_sp
3469     \z
3470 wakaba 1.1 )
3471     /x) {
3472     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3473     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3474     length $4 != 2 or length $5 != 2) {
3475     $self->{onerror}->(node => $input_node,
3476 wakaba 1.104 type => 'dateortime:syntax error',
3477     level => $self->{level}->{must});
3478 wakaba 1.1 }
3479    
3480     if (1 <= $2 and $2 <= 12) {
3481 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3482     level => $self->{level}->{must})
3483 wakaba 1.1 if $3 < 1 or
3484     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3485 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3486     level => $self->{level}->{must})
3487 wakaba 1.1 if $2 == 2 and $3 == 29 and
3488     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3489     } else {
3490     $self->{onerror}->(node => $input_node,
3491 wakaba 1.104 type => 'datetime:bad month',
3492     level => $self->{level}->{must});
3493 wakaba 1.1 }
3494    
3495     ($hour, $minute, $second) = ($4, $5, $6);
3496    
3497     if (defined $7) { ## [+-]hh:mm
3498     if (length $7 != 2 or length $8 != 2) {
3499     $self->{onerror}->(node => $input_node,
3500 wakaba 1.104 type => 'dateortime:syntax error',
3501     level => $self->{level}->{must});
3502 wakaba 1.1 }
3503    
3504     $self->{onerror}->(node => $input_node,
3505 wakaba 1.104 type => 'datetime:bad timezone hour',
3506     level => $self->{level}->{must})
3507 wakaba 1.1 if $7 > 23;
3508     $self->{onerror}->(node => $input_node,
3509 wakaba 1.104 type => 'datetime:bad timezone minute',
3510     level => $self->{level}->{must})
3511 wakaba 1.1 if $8 > 59;
3512     }
3513     } else { ## hh:mm
3514     if (length $1 != 2 or length $9 != 2) {
3515     $self->{onerror}->(node => $input_node,
3516 wakaba 1.104 type => qq'dateortime:syntax error',
3517     level => $self->{level}->{must});
3518 wakaba 1.1 }
3519    
3520     ($hour, $minute, $second) = ($1, $9, $10);
3521     }
3522    
3523 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3524     level => $self->{level}->{must}) if $hour > 23;
3525     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3526     level => $self->{level}->{must}) if $minute > 59;
3527 wakaba 1.1
3528     if (defined $second) { ## s
3529     ## NOTE: Integer part of second don't have to have length of two.
3530    
3531     if (substr ($second, 0, 1) eq '.') {
3532     $self->{onerror}->(node => $input_node,
3533 wakaba 1.104 type => 'dateortime:syntax error',
3534     level => $self->{level}->{must});
3535 wakaba 1.1 }
3536    
3537 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3538     level => $self->{level}->{must}) if $second >= 60;
3539 wakaba 1.1 }
3540     } else {
3541     $self->{onerror}->(node => $input_node,
3542 wakaba 1.104 type => 'dateortime:syntax error',
3543     level => $self->{level}->{must});
3544 wakaba 1.1 }
3545    
3546 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3547 wakaba 1.1 },
3548     };
3549    
3550     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3551 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3552 wakaba 1.113 ## TODO: content checking
3553     ## TODO: content or value must contain number (rev 2053)
3554 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3555 wakaba 1.40 %HTMLPhrasingContentChecker,
3556     check_attrs => $GetHTMLAttrsChecker->({
3557 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3558     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3559     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3560     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3561     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3562     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3563 wakaba 1.50 }, {
3564     %HTMLAttrStatus,
3565     high => FEATURE_HTML5_DEFAULT,
3566     low => FEATURE_HTML5_DEFAULT,
3567     max => FEATURE_HTML5_DEFAULT,
3568     min => FEATURE_HTML5_DEFAULT,
3569     optimum => FEATURE_HTML5_DEFAULT,
3570     value => FEATURE_HTML5_DEFAULT,
3571 wakaba 1.1 }),
3572     };
3573    
3574     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3575 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3576 wakaba 1.40 %HTMLPhrasingContentChecker,
3577     check_attrs => $GetHTMLAttrsChecker->({
3578 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3579     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3580 wakaba 1.50 }, {
3581     %HTMLAttrStatus,
3582     max => FEATURE_HTML5_DEFAULT,
3583     value => FEATURE_HTML5_DEFAULT,
3584 wakaba 1.1 }),
3585     };
3586    
3587     $Element->{$HTML_NS}->{code} = {
3588 wakaba 1.40 %HTMLPhrasingContentChecker,
3589 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3590 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3591     %HTMLAttrStatus,
3592 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3593 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3594 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3595 wakaba 1.49 }),
3596 wakaba 1.1 };
3597    
3598     $Element->{$HTML_NS}->{var} = {
3599 wakaba 1.40 %HTMLPhrasingContentChecker,
3600 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3601 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3602     %HTMLAttrStatus,
3603 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3604 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3605 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3606 wakaba 1.49 }),
3607 wakaba 1.1 };
3608    
3609     $Element->{$HTML_NS}->{samp} = {
3610 wakaba 1.40 %HTMLPhrasingContentChecker,
3611 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3612 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3613     %HTMLAttrStatus,
3614 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3615 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3616 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3617 wakaba 1.49 }),
3618 wakaba 1.1 };
3619    
3620     $Element->{$HTML_NS}->{kbd} = {
3621 wakaba 1.40 %HTMLPhrasingContentChecker,
3622 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3623 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3624     %HTMLAttrStatus,
3625 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3626 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3627 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3628 wakaba 1.49 }),
3629 wakaba 1.1 };
3630    
3631     $Element->{$HTML_NS}->{sub} = {
3632 wakaba 1.40 %HTMLPhrasingContentChecker,
3633 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3634 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3635     %HTMLAttrStatus,
3636 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3637 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3638 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3639 wakaba 1.49 }),
3640 wakaba 1.1 };
3641    
3642 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3643 wakaba 1.1
3644     $Element->{$HTML_NS}->{span} = {
3645 wakaba 1.40 %HTMLPhrasingContentChecker,
3646 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3647 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3648     %HTMLAttrStatus,
3649 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3650 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3651     dataformatas => FEATURE_HTML4_REC_RESERVED,
3652     datasrc => FEATURE_HTML4_REC_RESERVED,
3653 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3654 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3655 wakaba 1.49 }),
3656 wakaba 1.1 };
3657    
3658     $Element->{$HTML_NS}->{i} = {
3659 wakaba 1.40 %HTMLPhrasingContentChecker,
3660 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3661     check_attrs => $GetHTMLAttrsChecker->({}, {
3662     %HTMLAttrStatus,
3663     %HTMLM12NCommonAttrStatus,
3664 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3665 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3666 wakaba 1.49 }),
3667 wakaba 1.1 };
3668    
3669 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3670    
3671 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3672     %HTMLPhrasingContentChecker,
3673     status => FEATURE_M12N10_REC,
3674     check_attrs => $GetHTMLAttrsChecker->({}, {
3675     %HTMLAttrStatus,
3676     %HTMLM12NCommonAttrStatus,
3677 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3678 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3679     }),
3680     };
3681 wakaba 1.51
3682     $Element->{$HTML_NS}->{s} = {
3683 wakaba 1.40 %HTMLPhrasingContentChecker,
3684 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3685 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3686     %HTMLAttrStatus,
3687     %HTMLM12NCommonAttrStatus,
3688 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3689 wakaba 1.49 }),
3690 wakaba 1.1 };
3691    
3692 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3693    
3694     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3695    
3696 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3697 wakaba 1.40 %HTMLPhrasingContentChecker,
3698 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3699 wakaba 1.40 check_attrs => sub {
3700     my ($self, $item, $element_state) = @_;
3701 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3702     %HTMLAttrStatus,
3703 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3704     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3705     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3706     style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3707     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3708     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3709 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3710     sdasuff => FEATURE_HTML2X_RFC,
3711 wakaba 1.49 })->($self, $item, $element_state);
3712 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3713     $self->{onerror}->(node => $item->{node},
3714 wakaba 1.104 type => 'attribute missing',
3715     text => 'dir',
3716     level => $self->{level}->{must});
3717 wakaba 1.1 }
3718     },
3719     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3720     };
3721    
3722 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3723     %HTMLPhrasingContentChecker,
3724     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3725     check_attrs => $GetHTMLAttrsChecker->({}, {
3726     %HTMLAttrStatus,
3727     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3728 wakaba 1.153 lang => FEATURE_HTML5_WD,
3729 wakaba 1.99 }),
3730     check_start => sub {
3731     my ($self, $item, $element_state) = @_;
3732    
3733     $element_state->{phase} = 'before-rb';
3734     #$element_state->{has_sig}
3735 wakaba 1.100
3736     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3737     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3738 wakaba 1.99 },
3739     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3740     check_child_element => sub {
3741     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3742     $child_is_transparent, $element_state) = @_;
3743 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3744     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3745 wakaba 1.99 $self->{onerror}->(node => $child_el,
3746     type => 'element not allowed:minus',
3747 wakaba 1.104 level => $self->{level}->{must});
3748 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3749     #
3750     } elsif ($element_state->{phase} eq 'before-rb') {
3751     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3752     $element_state->{phase} = 'in-rb';
3753     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3754     $self->{onerror}->(node => $child_el,
3755 wakaba 1.104 level => $self->{level}->{should},
3756     type => 'no significant content before');
3757 wakaba 1.99 $element_state->{phase} = 'after-rt';
3758     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3759     $self->{onerror}->(node => $child_el,
3760 wakaba 1.104 level => $self->{level}->{should},
3761     type => 'no significant content before');
3762 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3763     } else {
3764     $self->{onerror}->(node => $child_el,
3765 wakaba 1.104 type => 'element not allowed:ruby base',
3766     level => $self->{level}->{must});
3767 wakaba 1.99 $element_state->{phase} = 'in-rb';
3768     }
3769     } elsif ($element_state->{phase} eq 'in-rb') {
3770     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3771     #$element_state->{phase} = 'in-rb';
3772     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3773     unless ($element_state->{has_significant}) {
3774     $self->{onerror}->(node => $child_el,
3775 wakaba 1.104 level => $self->{level}->{should},
3776     type => 'no significant content before');
3777 wakaba 1.99 }
3778     $element_state->{phase} = 'after-rt';
3779     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3780     unless ($element_state->{has_significant}) {
3781     $self->{onerror}->(node => $child_el,
3782 wakaba 1.104 level => $self->{level}->{should},
3783     type => 'no significant content before');
3784 wakaba 1.99 }
3785     $element_state->{phase} = 'after-rp1';
3786     } else {
3787     $self->{onerror}->(node => $child_el,
3788 wakaba 1.104 type => 'element not allowed:ruby base',
3789     level => $self->{level}->{must});
3790 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3791     }
3792     } elsif ($element_state->{phase} eq 'after-rt') {
3793     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3794     if ($element_state->{has_significant}) {
3795     $element_state->{has_sig} = 1;
3796     delete $element_state->{has_significant};
3797     }
3798     $element_state->{phase} = 'in-rb';
3799     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3800     $self->{onerror}->(node => $child_el,
3801 wakaba 1.104 level => $self->{level}->{should},
3802     type => 'no significant content before');
3803 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3804     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3805     $self->{onerror}->(node => $child_el,
3806 wakaba 1.104 level => $self->{level}->{should},
3807     type => 'no significant content before');
3808 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3809     } else {
3810     $self->{onerror}->(node => $child_el,
3811 wakaba 1.104 type => 'element not allowed:ruby base',
3812     level => $self->{level}->{must});
3813 wakaba 1.99 if ($element_state->{has_significant}) {
3814     $element_state->{has_sig} = 1;
3815     delete $element_state->{has_significant};
3816     }
3817     $element_state->{phase} = 'in-rb';
3818     }
3819     } elsif ($element_state->{phase} eq 'after-rp1') {
3820     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3821     $element_state->{phase} = 'after-rp-rt';
3822     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3823     $self->{onerror}->(node => $child_el,
3824 wakaba 1.104 type => 'ps element missing',
3825     text => 'rt',
3826     level => $self->{level}->{must});
3827 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3828     } else {
3829     $self->{onerror}->(node => $child_el,
3830 wakaba 1.104 type => 'ps element missing',
3831     text => 'rt',
3832     level => $self->{level}->{must});
3833 wakaba 1.99 $self->{onerror}->(node => $child_el,
3834 wakaba 1.104 type => 'ps element missing',
3835     text => 'rp',
3836     level => $self->{level}->{must});
3837 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3838     $self->{onerror}->(node => $child_el,
3839 wakaba 1.104 type => 'element not allowed:ruby base',
3840     level => $self->{level}->{must});
3841 wakaba 1.99 }
3842     if ($element_state->{has_significant}) {
3843     $element_state->{has_sig} = 1;
3844     delete $element_state->{has_significant};
3845     }
3846     $element_state->{phase} = 'in-rb';
3847     }
3848     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3849     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3850     $element_state->{phase} = 'after-rp2';
3851     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3852     $self->{onerror}->(node => $child_el,
3853 wakaba 1.104 type => 'ps element missing',
3854     text => 'rp',
3855     level => $self->{level}->{must});
3856 wakaba 1.99 $self->{onerror}->(node => $child_el,
3857 wakaba 1.104 level => $self->{level}->{should},
3858     type => 'no significant content before');
3859 wakaba 1.99 $element_state->{phase} = 'after-rt';
3860     } else {
3861     $self->{onerror}->(node => $child_el,
3862 wakaba 1.104 type => 'ps element missing',
3863     text => 'rp',
3864     level => $self->{level}->{must});
3865 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3866     $self->{onerror}->(node => $child_el,
3867 wakaba 1.104 type => 'element not allowed:ruby base',
3868     level => $self->{level}->{must});
3869 wakaba 1.99 }
3870     if ($element_state->{has_significant}) {
3871     $element_state->{has_sig} = 1;
3872     delete $element_state->{has_significant};
3873     }
3874     $element_state->{phase} = 'in-rb';
3875     }
3876     } elsif ($element_state->{phase} eq 'after-rp2') {
3877     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3878     if ($element_state->{has_significant}) {
3879     $element_state->{has_sig} = 1;
3880     delete $element_state->{has_significant};
3881     }
3882     $element_state->{phase} = 'in-rb';
3883     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3884     $self->{onerror}->(node => $child_el,
3885 wakaba 1.104 level => $self->{level}->{should},
3886     type => 'no significant content before');
3887 wakaba 1.99 $element_state->{phase} = 'after-rt';
3888     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3889     $self->{onerror}->(node => $child_el,
3890 wakaba 1.104 level => $self->{level}->{should},
3891     type => 'no significant content before');
3892 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3893     } else {
3894     $self->{onerror}->(node => $child_el,
3895 wakaba 1.104 type => 'element not allowed:ruby base',
3896     level => $self->{level}->{must});
3897 wakaba 1.99 if ($element_state->{has_significant}) {
3898     $element_state->{has_sig} = 1;
3899     delete $element_state->{has_significant};
3900     }
3901     $element_state->{phase} = 'in-rb';
3902     }
3903     } else {
3904     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3905     }
3906     },
3907     check_child_text => sub {
3908     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3909     if ($has_significant) {
3910     if ($element_state->{phase} eq 'before-rb') {
3911     $element_state->{phase} = 'in-rb';
3912     } elsif ($element_state->{phase} eq 'in-rb') {
3913     #
3914     } elsif ($element_state->{phase} eq 'after-rt' or
3915     $element_state->{phase} eq 'after-rp2') {
3916     $element_state->{phase} = 'in-rb';
3917     } elsif ($element_state->{phase} eq 'after-rp1') {
3918     $self->{onerror}->(node => $child_node,
3919 wakaba 1.104 type => 'ps element missing',
3920     text => 'rt',
3921     level => $self->{level}->{must});
3922 wakaba 1.99 $self->{onerror}->(node => $child_node,
3923 wakaba 1.104 type => 'ps element missing',
3924     text => 'rp',
3925     level => $self->{level}->{must});
3926 wakaba 1.99 $element_state->{phase} = 'in-rb';
3927     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3928     $self->{onerror}->(node => $child_node,
3929 wakaba 1.104 type => 'ps element missing',
3930     text => 'rp',
3931     level => $self->{level}->{must});
3932 wakaba 1.99 $element_state->{phase} = 'in-rb';
3933     } else {
3934     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3935     }
3936     }
3937     },
3938     check_end => sub {
3939     my ($self, $item, $element_state) = @_;
3940     $self->_remove_minus_elements ($element_state);
3941    
3942     if ($element_state->{phase} eq 'before-rb') {
3943     $self->{onerror}->(node => $item->{node},
3944 wakaba 1.104 level => $self->{level}->{should},
3945 wakaba 1.99 type => 'no significant content');
3946     $self->{onerror}->(node => $item->{node},
3947 wakaba 1.104 type => 'element missing',
3948     text => 'rt',
3949     level => $self->{level}->{must});
3950 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3951     unless ($element_state->{has_significant}) {
3952     $self->{onerror}->(node => $item->{node},
3953 wakaba 1.104 level => $self->{level}->{should},
3954     type => 'no significant content at the end');
3955 wakaba 1.99 }
3956     $self->{onerror}->(node => $item->{node},
3957 wakaba 1.104 type => 'element missing',
3958     text => 'rt',
3959     level => $self->{level}->{must});
3960 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3961     $element_state->{phase} eq 'after-rp2') {
3962     #
3963     } elsif ($element_state->{phase} eq 'after-rp1') {
3964     $self->{onerror}->(node => $item->{node},
3965 wakaba 1.104 type => 'element missing',
3966     text => 'rt',
3967     level => $self->{level}->{must});
3968 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3969 wakaba 1.104 type => 'element missing',
3970     text => 'rp',
3971     level => $self->{level}->{must});
3972 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3973     $self->{onerror}->(node => $item->{node},
3974 wakaba 1.104 type => 'element missing',
3975     text => 'rp',
3976     level => $self->{level}->{must});
3977 wakaba 1.99 } else {
3978     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3979     }
3980    
3981     ## NOTE: A modified version of |check_end| of %AnyChecker.
3982     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3983     $item->{real_parent_state}->{has_significant} = 1;
3984     }
3985     },
3986     };
3987    
3988     $Element->{$HTML_NS}->{rt} = {
3989     %HTMLPhrasingContentChecker,
3990     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3991     check_attrs => $GetHTMLAttrsChecker->({}, {
3992     %HTMLAttrStatus,
3993     %HTMLM12NXHTML2CommonAttrStatus,
3994 wakaba 1.153 lang => FEATURE_HTML5_WD,
3995 wakaba 1.99 }),
3996     };
3997    
3998     $Element->{$HTML_NS}->{rp} = {
3999 wakaba 1.171 %HTMLPhrasingContentChecker,
4000 wakaba 1.99 status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
4001     check_attrs => $GetHTMLAttrsChecker->({}, {
4002     %HTMLAttrStatus,
4003     %HTMLM12NXHTML2CommonAttrStatus,
4004 wakaba 1.153 lang => FEATURE_HTML5_WD,
4005 wakaba 1.99 }),
4006 wakaba 1.171 }; # rp
4007 wakaba 1.99
4008 wakaba 1.29 =pod
4009    
4010     ## TODO:
4011    
4012     +
4013     + <p>Partly because of the confusion described above, authors are
4014     + strongly recommended to always mark up all paragraphs with the
4015     + <code>p</code> element, and to not have any <code>ins</code> or
4016     + <code>del</code> elements that cross across any <span
4017     + title="paragraph">implied paragraphs</span>.</p>
4018     +
4019     (An informative note)
4020    
4021     <p><code>ins</code> elements should not cross <span
4022     + title="paragraph">implied paragraph</span> boundaries.</p>
4023     (normative)
4024    
4025     + <p><code>del</code> elements should not cross <span
4026     + title="paragraph">implied paragraph</span> boundaries.</p>
4027     (normative)
4028    
4029     =cut
4030    
4031 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4032 wakaba 1.40 %HTMLTransparentChecker,
4033 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4034 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4035 wakaba 1.1 cite => $HTMLURIAttrChecker,
4036 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4037 wakaba 1.49 }, {
4038     %HTMLAttrStatus,
4039     %HTMLM12NCommonAttrStatus,
4040 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4041 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4042     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4043 wakaba 1.1 }),
4044 wakaba 1.66 check_start => sub {
4045     my ($self, $item, $element_state) = @_;
4046    
4047     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4048 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4049     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4050 wakaba 1.66 },
4051 wakaba 1.1 };
4052    
4053     $Element->{$HTML_NS}->{del} = {
4054 wakaba 1.40 %HTMLTransparentChecker,
4055 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4056 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4057 wakaba 1.1 cite => $HTMLURIAttrChecker,
4058 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4059 wakaba 1.49 }, {
4060     %HTMLAttrStatus,
4061     %HTMLM12NCommonAttrStatus,
4062 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4063 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4064     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4065 wakaba 1.1 }),
4066 wakaba 1.40 check_end => sub {
4067     my ($self, $item, $element_state) = @_;
4068     if ($element_state->{has_significant}) {
4069     ## NOTE: Significantness flag does not propagate.
4070     } elsif ($item->{transparent}) {
4071     #
4072     } else {
4073     $self->{onerror}->(node => $item->{node},
4074 wakaba 1.104 level => $self->{level}->{should},
4075 wakaba 1.40 type => 'no significant content');
4076     }
4077 wakaba 1.1 },
4078 wakaba 1.66 check_start => sub {
4079     my ($self, $item, $element_state) = @_;
4080    
4081     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4082 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4083     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4084 wakaba 1.66 },
4085 wakaba 1.1 };
4086    
4087 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4088 wakaba 1.72 %HTMLFlowContentChecker,
4089 wakaba 1.153 status => FEATURE_HTML5_WD,
4090 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4091 wakaba 1.41 check_child_element => sub {
4092     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4093     $child_is_transparent, $element_state) = @_;
4094 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4095     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4096 wakaba 1.41 $self->{onerror}->(node => $child_el,
4097     type => 'element not allowed:minus',
4098 wakaba 1.104 level => $self->{level}->{must});
4099 wakaba 1.41 $element_state->{has_non_legend} = 1;
4100 wakaba 1.181 $element_state->{has_non_table} = 1;
4101 wakaba 1.41 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4102 wakaba 1.181 $element_state->{has_non_table} = 1;
4103 wakaba 1.41 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4104     if ($element_state->{has_legend_at_first}) {
4105     $self->{onerror}->(node => $child_el,
4106     type => 'element not allowed:figure legend',
4107 wakaba 1.104 level => $self->{level}->{must});
4108 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4109     $self->{onerror}->(node => $element_state->{has_legend},
4110     type => 'element not allowed:figure legend',
4111 wakaba 1.104 level => $self->{level}->{must});
4112 wakaba 1.41 $element_state->{has_legend} = $child_el;
4113     } elsif ($element_state->{has_non_legend}) {
4114     $element_state->{has_legend} = $child_el;
4115     } else {
4116     $element_state->{has_legend_at_first} = 1;
4117 wakaba 1.35 }
4118 wakaba 1.41 delete $element_state->{has_non_legend};
4119     } else {
4120 wakaba 1.181 if ($child_nsuri eq $HTML_NS and $child_ln eq 'table') {
4121     $element_state->{has_table}++;
4122     } else {
4123     $element_state->{has_non_table}++;
4124     }
4125 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4126 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4127 wakaba 1.41 }
4128     },
4129     check_child_text => sub {
4130     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4131     if ($has_significant) {
4132     $element_state->{has_non_legend} = 1;
4133 wakaba 1.181 $element_state->{has_non_table}++;
4134 wakaba 1.35 }
4135 wakaba 1.170
4136     $element_state->{in_figure} = 1;
4137 wakaba 1.41 },
4138     check_end => sub {
4139     my ($self, $item, $element_state) = @_;
4140 wakaba 1.35
4141 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4142     #
4143     } elsif ($element_state->{has_legend}) {
4144     if ($element_state->{has_non_legend}) {
4145     $self->{onerror}->(node => $element_state->{has_legend},
4146 wakaba 1.35 type => 'element not allowed:figure legend',
4147 wakaba 1.104 level => $self->{level}->{must});
4148 wakaba 1.35 }
4149     }
4150 wakaba 1.41
4151 wakaba 1.181 if (($element_state->{has_table} || 0) == 1 and
4152     not $element_state->{has_non_table} and
4153     $element_state->{table_caption_element}) {
4154     $self->{onerror}->(node => $element_state->{table_caption_element},
4155     type => 'element not allowed',
4156     level => $self->{level}->{should});
4157     }
4158    
4159 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4160 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4161 wakaba 1.35 },
4162     };
4163 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4164 wakaba 1.1
4165 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4166     my ($self, $attr) = @_;
4167 wakaba 1.104 $self->{onerror}->(node => $attr,
4168     type => 'unknown attribute',
4169     level => $self->{level}->{uncertain});
4170 wakaba 1.92 };
4171    
4172 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4173 wakaba 1.40 %HTMLEmptyChecker,
4174 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4175 wakaba 1.40 check_attrs => sub {
4176     my ($self, $item, $element_state) = @_;
4177 wakaba 1.1 $GetHTMLAttrsChecker->({
4178 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4179     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4180     }),
4181 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4182 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4183 wakaba 1.1 src => $HTMLURIAttrChecker,
4184     usemap => $HTMLUsemapAttrChecker,
4185 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4186 wakaba 1.1 ismap => sub {
4187 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4188     if (not $self->{flag}->{in_a_href}) {
4189 wakaba 1.15 $self->{onerror}->(node => $attr,
4190 wakaba 1.59 type => 'attribute not allowed:ismap',
4191 wakaba 1.104 level => $self->{level}->{must});
4192 wakaba 1.1 }
4193 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4194 wakaba 1.1 },
4195 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4196     ## TODO: HTML4 |name|
4197 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4198 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4199 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4200 wakaba 1.49 }, {
4201     %HTMLAttrStatus,
4202 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4203 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4204 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4205 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4206 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4207 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4208 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4209     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4210 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4211     name => FEATURE_M12N10_REC_DEPRECATED,
4212 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4213 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4214     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4215 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4216 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4217 wakaba 1.66 })->($self, $item, $element_state);
4218 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4219     $self->{onerror}->(node => $item->{node},
4220 wakaba 1.104 type => 'attribute missing',
4221     text => 'alt',
4222     level => $self->{level}->{should});
4223 wakaba 1.114 ## TODO: ...
4224 wakaba 1.1 }
4225 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4226     $self->{onerror}->(node => $item->{node},
4227 wakaba 1.104 type => 'attribute missing',
4228     text => 'src',
4229     level => $self->{level}->{must});
4230 wakaba 1.1 }
4231 wakaba 1.66
4232 wakaba 1.114 ## TODO: external resource check
4233    
4234 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4235     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4236     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4237     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4238 wakaba 1.1 },
4239     };
4240    
4241     $Element->{$HTML_NS}->{iframe} = {
4242 wakaba 1.40 %HTMLTextChecker,
4243 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4244 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4245 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4246 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4247 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4248 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4249     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4250     }),
4251     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4252 wakaba 1.1 src => $HTMLURIAttrChecker,
4253 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4254 wakaba 1.49 }, {
4255     %HTMLAttrStatus,
4256     %HTMLM12NCommonAttrStatus,
4257     align => FEATURE_XHTML10_REC,
4258 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4259 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4260 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4261     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4262 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4263     marginheight => FEATURE_M12N10_REC,
4264     marginwidth => FEATURE_M12N10_REC,
4265 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4266     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4267     sandbox => FEATURE_HTML5_WD,
4268 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4269 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4270     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4271 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4272     width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4273 wakaba 1.1 }),
4274 wakaba 1.66 check_start => sub {
4275     my ($self, $item, $element_state) = @_;
4276    
4277     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4278 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4279     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4280 wakaba 1.66 },
4281 wakaba 1.40 };
4282    
4283 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4284 wakaba 1.40 %HTMLEmptyChecker,
4285 wakaba 1.98 status => FEATURE_HTML5_WD,
4286 wakaba 1.40 check_attrs => sub {
4287     my ($self, $item, $element_state) = @_;
4288 wakaba 1.1 my $has_src;
4289 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4290 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4291     $attr_ns = '' unless defined $attr_ns;
4292     my $attr_ln = $attr->manakai_local_name;
4293     my $checker;
4294 wakaba 1.73
4295     my $status = {
4296     %HTMLAttrStatus,
4297 wakaba 1.153 height => FEATURE_HTML5_LC,
4298 wakaba 1.98 src => FEATURE_HTML5_WD,
4299     type => FEATURE_HTML5_WD,
4300 wakaba 1.153 width => FEATURE_HTML5_LC,
4301 wakaba 1.73 }->{$attr_ln};
4302    
4303 wakaba 1.1 if ($attr_ns eq '') {
4304     if ($attr_ln eq 'src') {
4305     $checker = $HTMLURIAttrChecker;
4306     $has_src = 1;
4307     } elsif ($attr_ln eq 'type') {
4308     $checker = $HTMLIMTAttrChecker;
4309 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4310 wakaba 1.178 $checker = $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 });
4311 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4312     $attr_ln !~ /[A-Z]/) {
4313 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4314     $status = $HTMLDatasetAttrStatus;
4315 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4316 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4317 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4318 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4319     || sub { }; ## NOTE: Any local attribute is ok.
4320 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4321 wakaba 1.117 } else {
4322     $checker = $HTMLAttrChecker->{$attr_ln};
4323 wakaba 1.1 }
4324     }
4325     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4326 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4327     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4328     || $AttrStatus->{$attr_ns}->{''};
4329     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4330 wakaba 1.62
4331 wakaba 1.1 if ($checker) {
4332 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4333 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4334 wakaba 1.54 #
4335 wakaba 1.1 } else {
4336 wakaba 1.104 $self->{onerror}->(node => $attr,
4337     type => 'unknown attribute',
4338     level => $self->{level}->{uncertain});
4339 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4340     }
4341    
4342 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4343 wakaba 1.1 }
4344    
4345     unless ($has_src) {
4346 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4347 wakaba 1.104 type => 'attribute missing',
4348     text => 'src',
4349 wakaba 1.114 level => $self->{level}->{info});
4350     ## NOTE: <embed> without src="" is allowed since revision 1929.
4351     ## We issues an informational message since <embed> w/o src=""
4352     ## is likely an authoring error.
4353 wakaba 1.1 }
4354 wakaba 1.114
4355     ## TODO: external resource check
4356 wakaba 1.66
4357     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4358 wakaba 1.1 },
4359     };
4360    
4361 wakaba 1.49 ## TODO:
4362     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4363     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4364    
4365 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4366 wakaba 1.40 %HTMLTransparentChecker,
4367 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4368 wakaba 1.40 check_attrs => sub {
4369     my ($self, $item, $element_state) = @_;
4370 wakaba 1.1 $GetHTMLAttrsChecker->({
4371 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4372     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4373     }),
4374     archive => $HTMLSpaceURIsAttrChecker,
4375     ## TODO: Relative to @codebase
4376     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4377     classid => $HTMLURIAttrChecker,
4378     codebase => $HTMLURIAttrChecker,
4379     codetype => $HTMLIMTAttrChecker,
4380     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4381 wakaba 1.1 data => $HTMLURIAttrChecker,
4382 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4383     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4384     ## [HTML4] but we don't know how to test this.
4385 wakaba 1.167 form => $HTMLFormAttrChecker,
4386 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4387 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4388 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4389 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4390     ## the name of the browsing context created by the element,
4391     ## if any, but is also used as the form control name of the
4392     ## form control provided by the plugin, if any.
4393 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4394 wakaba 1.1 type => $HTMLIMTAttrChecker,
4395     usemap => $HTMLUsemapAttrChecker,
4396 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4397 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4398 wakaba 1.49 }, {
4399     %HTMLAttrStatus,
4400 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4401 wakaba 1.49 align => FEATURE_XHTML10_REC,
4402 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4403 wakaba 1.49 border => FEATURE_XHTML10_REC,
4404     classid => FEATURE_M12N10_REC,
4405     codebase => FEATURE_M12N10_REC,
4406     codetype => FEATURE_M12N10_REC,
4407 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4408 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4409 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4410     dataformatas => FEATURE_HTML4_REC_RESERVED,
4411     datasrc => FEATURE_HTML4_REC_RESERVED,
4412 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4413 wakaba 1.167 form => FEATURE_HTML5_DEFAULT,
4414 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4415 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4416 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4417     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4418 wakaba 1.49 standby => FEATURE_M12N10_REC,
4419 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4420 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4421     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4422 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4423 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4424 wakaba 1.66 })->($self, $item, $element_state);
4425 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4426     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4427     $self->{onerror}->(node => $item->{node},
4428 wakaba 1.104 type => 'attribute missing:data|type',
4429     level => $self->{level}->{must});
4430 wakaba 1.1 }
4431     }
4432 wakaba 1.66
4433     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4434     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4435     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4436     ## TODO: archive
4437     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4438 wakaba 1.1 },
4439 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4440 wakaba 1.41 check_child_element => sub {
4441     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4442     $child_is_transparent, $element_state) = @_;
4443 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4444     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4445 wakaba 1.41 $self->{onerror}->(node => $child_el,
4446     type => 'element not allowed:minus',
4447 wakaba 1.104 level => $self->{level}->{must});
4448 wakaba 1.41 $element_state->{has_non_legend} = 1;
4449     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4450     #
4451     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4452     if ($element_state->{has_non_param}) {
4453 wakaba 1.104 $self->{onerror}->(node => $child_el,
4454 wakaba 1.72 type => 'element not allowed:flow',
4455 wakaba 1.104 level => $self->{level}->{must});
4456 wakaba 1.39 }
4457 wakaba 1.41 } else {
4458 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4459 wakaba 1.41 $element_state->{has_non_param} = 1;
4460 wakaba 1.39 }
4461 wakaba 1.25 },
4462 wakaba 1.41 check_child_text => sub {
4463     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4464     if ($has_significant) {
4465     $element_state->{has_non_param} = 1;
4466     }
4467 wakaba 1.42 },
4468     check_end => sub {
4469     my ($self, $item, $element_state) = @_;
4470     if ($element_state->{has_significant}) {
4471 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4472 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4473     ## NOTE: Transparent.
4474     } else {
4475     $self->{onerror}->(node => $item->{node},
4476 wakaba 1.104 level => $self->{level}->{should},
4477 wakaba 1.42 type => 'no significant content');
4478     }
4479     },
4480 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4481 wakaba 1.1 };
4482 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4483     ## What about |<section><object data><style scoped></style>x</object></section>|?
4484     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4485 wakaba 1.1
4486     $Element->{$HTML_NS}->{param} = {
4487 wakaba 1.40 %HTMLEmptyChecker,
4488 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4489 wakaba 1.40 check_attrs => sub {
4490     my ($self, $item, $element_state) = @_;
4491 wakaba 1.1 $GetHTMLAttrsChecker->({
4492     name => sub { },
4493 wakaba 1.70 type => $HTMLIMTAttrChecker,
4494 wakaba 1.1 value => sub { },
4495 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4496     data => 1, ref => 1, object => 1,
4497     }),
4498 wakaba 1.49 }, {
4499     %HTMLAttrStatus,
4500 wakaba 1.154 href => FEATURE_RDFA_REC,
4501 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4502     name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4503 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4504 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4505 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4506 wakaba 1.66 })->(@_);
4507 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4508     $self->{onerror}->(node => $item->{node},
4509 wakaba 1.104 type => 'attribute missing',
4510     text => 'name',
4511     level => $self->{level}->{must});
4512 wakaba 1.1 }
4513 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4514     $self->{onerror}->(node => $item->{node},
4515 wakaba 1.104 type => 'attribute missing',
4516     text => 'value',
4517     level => $self->{level}->{must});
4518 wakaba 1.1 }
4519     },
4520     };
4521    
4522     $Element->{$HTML_NS}->{video} = {
4523 wakaba 1.40 %HTMLTransparentChecker,
4524 wakaba 1.48 status => FEATURE_HTML5_LC,
4525 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4526 wakaba 1.1 src => $HTMLURIAttrChecker,
4527     ## TODO: start, loopstart, loopend, end
4528     ## ISSUE: they MUST be "value time offset"s. Value?
4529 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4530 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4531 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4532     controls => $GetHTMLBooleanAttrChecker->('controls'),
4533 wakaba 1.59 poster => $HTMLURIAttrChecker,
4534 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4535     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4536 wakaba 1.50 }, {
4537     %HTMLAttrStatus,
4538 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4539 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4540     controls => FEATURE_HTML5_LC,
4541 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4542 wakaba 1.50 height => FEATURE_HTML5_LC,
4543 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4544     loopstart => FEATURE_HTML5_AT_RISK,
4545     playcount => FEATURE_HTML5_AT_RISK,
4546 wakaba 1.50 poster => FEATURE_HTML5_LC,
4547     src => FEATURE_HTML5_LC,
4548 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4549 wakaba 1.50 width => FEATURE_HTML5_LC,
4550 wakaba 1.1 }),
4551 wakaba 1.42 check_start => sub {
4552     my ($self, $item, $element_state) = @_;
4553     $element_state->{allow_source}
4554     = not $item->{node}->has_attribute_ns (undef, 'src');
4555     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4556     ## NOTE: It might be set true by |check_element|.
4557 wakaba 1.66
4558     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4559     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4560 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4561     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4562 wakaba 1.42 },
4563     check_child_element => sub {
4564     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4565     $child_is_transparent, $element_state) = @_;
4566 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4567     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4568 wakaba 1.42 $self->{onerror}->(node => $child_el,
4569     type => 'element not allowed:minus',
4570 wakaba 1.104 level => $self->{level}->{must});
4571 wakaba 1.42 delete $element_state->{allow_source};
4572     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4573     #
4574     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4575 wakaba 1.45 unless ($element_state->{allow_source}) {
4576 wakaba 1.104 $self->{onerror}->(node => $child_el,
4577 wakaba 1.72 type => 'element not allowed:flow',
4578 wakaba 1.104 level => $self->{level}->{must});
4579 wakaba 1.42 }
4580 wakaba 1.45 $element_state->{has_source} = 1;
4581 wakaba 1.1 } else {
4582 wakaba 1.42 delete $element_state->{allow_source};
4583 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4584 wakaba 1.42 }
4585     },
4586     check_child_text => sub {
4587     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4588     if ($has_significant) {
4589     delete $element_state->{allow_source};
4590     }
4591 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4592 wakaba 1.42 },
4593     check_end => sub {
4594     my ($self, $item, $element_state) = @_;
4595     if ($element_state->{has_source} == -1) {
4596     $self->{onerror}->(node => $item->{node},
4597 wakaba 1.104 type => 'child element missing',
4598     text => 'source',
4599     level => $self->{level}->{must});
4600 wakaba 1.1 }
4601 wakaba 1.42
4602     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4603 wakaba 1.1 },
4604     };
4605    
4606     $Element->{$HTML_NS}->{audio} = {
4607 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4608 wakaba 1.48 status => FEATURE_HTML5_LC,
4609 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4610     src => $HTMLURIAttrChecker,
4611     ## TODO: start, loopstart, loopend, end
4612     ## ISSUE: they MUST be "value time offset"s. Value?
4613     ## ISSUE: playcount has no conformance creteria
4614 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4615 wakaba 1.42 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4616     controls => $GetHTMLBooleanAttrChecker->('controls'),
4617 wakaba 1.50 }, {
4618     %HTMLAttrStatus,
4619 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4620 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4621     controls => FEATURE_HTML5_LC,
4622 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4623     loopend => FEATURE_HTML5_AT_RISK,
4624     loopstart => FEATURE_HTML5_AT_RISK,
4625     playcount => FEATURE_HTML5_AT_RISK,
4626 wakaba 1.50 src => FEATURE_HTML5_LC,
4627 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4628 wakaba 1.42 }),
4629 wakaba 1.1 };
4630    
4631     $Element->{$HTML_NS}->{source} = {
4632 wakaba 1.40 %HTMLEmptyChecker,
4633 wakaba 1.153 status => FEATURE_HTML5_LC,
4634 wakaba 1.40 check_attrs => sub {
4635     my ($self, $item, $element_state) = @_;
4636 wakaba 1.1 $GetHTMLAttrsChecker->({
4637 wakaba 1.90 media => $HTMLMQAttrChecker,
4638     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4639     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4640 wakaba 1.1 type => $HTMLIMTAttrChecker,
4641 wakaba 1.50 }, {
4642     %HTMLAttrStatus,
4643 wakaba 1.153 media => FEATURE_HTML5_LC,
4644     pixelratio => FEATURE_HTML5_LC,
4645     src => FEATURE_HTML5_LC,
4646     type => FEATURE_HTML5_LC,
4647 wakaba 1.66 })->(@_);
4648 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4649     $self->{onerror}->(node => $item->{node},
4650 wakaba 1.104 type => 'attribute missing',
4651     text => 'src',
4652     level => $self->{level}->{must});
4653 wakaba 1.1 }
4654 wakaba 1.66
4655     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4656 wakaba 1.1 },
4657     };
4658    
4659     $Element->{$HTML_NS}->{canvas} = {
4660 wakaba 1.40 %HTMLTransparentChecker,
4661 wakaba 1.89 status => FEATURE_HTML5_COMPLETE,
4662 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4663 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4664     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4665 wakaba 1.50 }, {
4666     %HTMLAttrStatus,
4667 wakaba 1.89 height => FEATURE_HTML5_COMPLETE,
4668     width => FEATURE_HTML5_COMPLETE,
4669 wakaba 1.1 }),
4670 wakaba 1.178
4671     # Authors MUST provide alternative content (HTML5 revision 2868) -
4672     # This requirement cannot be checked, since the alternative content
4673     # might be placed outside of the element.
4674     }; # canvas
4675 wakaba 1.1
4676     $Element->{$HTML_NS}->{map} = {
4677 wakaba 1.72 %HTMLFlowContentChecker,
4678 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4679 wakaba 1.40 check_attrs => sub {
4680     my ($self, $item, $element_state) = @_;
4681 wakaba 1.100 my $has_name;
4682 wakaba 1.4 $GetHTMLAttrsChecker->({
4683 wakaba 1.100 name => sub {
4684     my ($self, $attr) = @_;
4685     my $value = $attr->value;
4686     if (length $value) {
4687     ## NOTE: Duplication is not non-conforming.
4688     ## NOTE: Space characters are not non-conforming.
4689     #
4690     } else {
4691     $self->{onerror}->(node => $attr,
4692     type => 'empty attribute value',
4693 wakaba 1.104 level => $self->{level}->{must});
4694 wakaba 1.100 }
4695 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4696 wakaba 1.100 $has_name = [$value, $attr];
4697 wakaba 1.4 },
4698 wakaba 1.49 }, {
4699     %HTMLAttrStatus,
4700 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4701     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4702     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4703     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4704     #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4705     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4706 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4707     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4708     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4709     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4710     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4711     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4712     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4713     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4714     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4715     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4716 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4717 wakaba 1.66 })->(@_);
4718 wakaba 1.100
4719 wakaba 1.135 if ($has_name) {
4720 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4721 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4722 wakaba 1.155 $self->{onerror}
4723     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4724     type => 'id ne name',
4725     level => $self->{level}->{must});
4726 wakaba 1.100 }
4727 wakaba 1.135 } else {
4728 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4729 wakaba 1.104 type => 'attribute missing',
4730     text => 'name',
4731     level => $self->{level}->{must});
4732 wakaba 1.100 }
4733 wakaba 1.4 },
4734 wakaba 1.59 check_start => sub {
4735     my ($self, $item, $element_state) = @_;
4736     $element_state->{in_map_original} = $self->{flag}->{in_map};
4737 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4738     ## NOTE: |{in_map}| is a reference to the array which contains
4739     ## hash references. Hashes are corresponding to the opening
4740     ## |map| elements and each of them contains the key-value
4741     ## pairs corresponding to the absolute URLs for the processed
4742     ## |area| elements in the |map| element corresponding to the
4743     ## hash. The key represents the resource (## TODO: use
4744     ## absolute URL), while the value represents whether there is
4745     ## an |area| element whose |alt| attribute is specified to a
4746     ## non-empty value. If there IS such an |area| element for
4747     ## the resource specified by the key, then the value is set to
4748     ## zero (|0|). Otherwise, if there is no such an |area|
4749     ## element but there is any |area| element with the empty
4750     ## |alt=""| attribute, then the value contains an array
4751     ## reference that contains all of such |area| elements.
4752 wakaba 1.79
4753     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4754     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4755 wakaba 1.59 },
4756     check_end => sub {
4757     my ($self, $item, $element_state) = @_;
4758 wakaba 1.137
4759     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4760     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4761     next unless $nodes;
4762     for (@$nodes) {
4763     $self->{onerror}->(type => 'empty area alt',
4764     node => $_,
4765     level => $self->{level}->{html5_no_may});
4766     }
4767     }
4768    
4769     $self->{flag}->{in_map} = $element_state->{in_map_original};
4770    
4771 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4772 wakaba 1.59 },
4773 wakaba 1.1 };
4774    
4775     $Element->{$HTML_NS}->{area} = {
4776 wakaba 1.40 %HTMLEmptyChecker,
4777 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4778 wakaba 1.40 check_attrs => sub {
4779     my ($self, $item, $element_state) = @_;
4780 wakaba 1.1 my %attr;
4781     my $coords;
4782 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4783 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4784     $attr_ns = '' unless defined $attr_ns;
4785     my $attr_ln = $attr->manakai_local_name;
4786     my $checker;
4787 wakaba 1.73 my $status;
4788 wakaba 1.1 if ($attr_ns eq '') {
4789 wakaba 1.73 $status = {
4790     %HTMLAttrStatus,
4791     %HTMLM12NCommonAttrStatus,
4792 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
4793 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4794     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4795 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4796 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4797     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4798 wakaba 1.154 media => FEATURE_HTML5_WD,
4799 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4800     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4801     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4802 wakaba 1.153 ping => FEATURE_HTML5_WD,
4803 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4804 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4805 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4806 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4807     type => FEATURE_HTML5_WD,
4808 wakaba 1.73 }->{$attr_ln};
4809    
4810 wakaba 1.1 $checker = {
4811 wakaba 1.153 alt => sub {
4812     ## NOTE: Checked later.
4813     },
4814 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4815     circ => -1, circle => 1,
4816     default => 1,
4817     poly => 1, polygon => -1,
4818     rect => 1, rectangle => -1,
4819     }),
4820     coords => sub {
4821     my ($self, $attr) = @_;
4822     my $value = $attr->value;
4823     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4824     $coords = [split /,/, $value];
4825     } else {
4826     $self->{onerror}->(node => $attr,
4827 wakaba 1.104 type => 'coords:syntax error',
4828     level => $self->{level}->{must});
4829 wakaba 1.1 }
4830     },
4831 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4832     target => $HTMLTargetAttrChecker,
4833 wakaba 1.1 href => $HTMLURIAttrChecker,
4834     ping => $HTMLSpaceURIsAttrChecker,
4835 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4836 wakaba 1.1 media => $HTMLMQAttrChecker,
4837     hreflang => $HTMLLanguageTagAttrChecker,
4838     type => $HTMLIMTAttrChecker,
4839     }->{$attr_ln};
4840     if ($checker) {
4841     $attr{$attr_ln} = $attr;
4842 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4843     $attr_ln !~ /[A-Z]/) {
4844 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4845     $status = $HTMLDatasetAttrStatus;
4846 wakaba 1.1 } else {
4847     $checker = $HTMLAttrChecker->{$attr_ln};
4848     }
4849     }
4850     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4851 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4852     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4853     || $AttrStatus->{$attr_ns}->{''};
4854     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4855 wakaba 1.62
4856 wakaba 1.1 if ($checker) {
4857 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4858 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4859 wakaba 1.54 #
4860 wakaba 1.1 } else {
4861 wakaba 1.104 $self->{onerror}->(node => $attr,
4862     type => 'unknown attribute',
4863     level => $self->{level}->{uncertain});
4864 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4865     }
4866 wakaba 1.49
4867 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4868 wakaba 1.1 }
4869    
4870     if (defined $attr{href}) {
4871 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4872 wakaba 1.137 if (defined $attr{alt}) {
4873     my $url = $attr{href}->value; ## TODO: resolve
4874     if (length $attr{alt}->value) {
4875     for (@{$self->{flag}->{in_map} or []}) {
4876     $_->{$url} = 0;
4877     }
4878     } else {
4879     ## NOTE: Empty |alt=""|. If there is another |area| element
4880     ## with the same |href=""| and that |area| elemnet's
4881     ## |alt=""| attribute is not an empty string, then this
4882     ## is conforming.
4883     for (@{$self->{flag}->{in_map} or []}) {
4884     push @{$_->{$url} ||= []}, $attr{alt}
4885     unless exists $_->{$url} and not $_->{$url};
4886     }
4887     }
4888     } else {
4889 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4890 wakaba 1.104 type => 'attribute missing',
4891     text => 'alt',
4892     level => $self->{level}->{must});
4893 wakaba 1.1 }
4894     } else {
4895     for (qw/target ping rel media hreflang type alt/) {
4896     if (defined $attr{$_}) {
4897     $self->{onerror}->(node => $attr{$_},
4898 wakaba 1.104 type => 'attribute not allowed',
4899     level => $self->{level}->{must});
4900 wakaba 1.1 }
4901     }
4902     }
4903    
4904     my $shape = 'rectangle';
4905     if (defined $attr{shape}) {
4906     $shape = {
4907     circ => 'circle', circle => 'circle',
4908     default => 'default',
4909     poly => 'polygon', polygon => 'polygon',
4910     rect => 'rectangle', rectangle => 'rectangle',
4911     }->{lc $attr{shape}->value} || 'rectangle';
4912     ## TODO: ASCII lowercase?
4913     }
4914    
4915     if ($shape eq 'circle') {
4916     if (defined $attr{coords}) {
4917     if (defined $coords) {
4918     if (@$coords == 3) {
4919     if ($coords->[2] < 0) {
4920     $self->{onerror}->(node => $attr{coords},
4921 wakaba 1.104 type => 'coords:out of range',
4922     index => 2,
4923     value => $coords->[2],
4924     level => $self->{level}->{must});
4925 wakaba 1.1 }
4926     } else {
4927     $self->{onerror}->(node => $attr{coords},
4928 wakaba 1.104 type => 'coords:number not 3',
4929     text => 0+@$coords,
4930     level => $self->{level}->{must});
4931 wakaba 1.1 }
4932     } else {
4933     ## NOTE: A syntax error has been reported.
4934     }
4935     } else {
4936 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4937 wakaba 1.104 type => 'attribute missing',
4938     text => 'coords',
4939     level => $self->{level}->{must});
4940 wakaba 1.1 }
4941     } elsif ($shape eq 'default') {
4942     if (defined $attr{coords}) {
4943     $self->{onerror}->(node => $attr{coords},
4944 wakaba 1.104 type => 'attribute not allowed',
4945     level => $self->{level}->{must});
4946 wakaba 1.1 }
4947     } elsif ($shape eq 'polygon') {
4948     if (defined $attr{coords}) {
4949     if (defined $coords) {
4950     if (@$coords >= 6) {
4951     unless (@$coords % 2 == 0) {
4952     $self->{onerror}->(node => $attr{coords},
4953 wakaba 1.104 type => 'coords:number not even',
4954     text => 0+@$coords,
4955     level => $self->{level}->{must});
4956 wakaba 1.1 }
4957     } else {
4958     $self->{onerror}->(node => $attr{coords},
4959 wakaba 1.104 type => 'coords:number lt 6',
4960     text => 0+@$coords,
4961     level => $self->{level}->{must});
4962 wakaba 1.1 }
4963     } else {
4964     ## NOTE: A syntax error has been reported.
4965     }
4966     } else {
4967 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4968 wakaba 1.104 type => 'attribute missing',
4969     text => 'coords',
4970     level => $self->{level}->{must});
4971 wakaba 1.1 }
4972     } elsif ($shape eq 'rectangle') {
4973     if (defined $attr{coords}) {
4974     if (defined $coords) {
4975     if (@$coords == 4) {
4976     unless ($coords->[0] < $coords->[2]) {
4977     $self->{onerror}->(node => $attr{coords},
4978 wakaba 1.104 type => 'coords:out of range',
4979     index => 0,
4980     value => $coords->[0],
4981     level => $self->{level}->{must});
4982 wakaba 1.1 }
4983     unless ($coords->[1] < $coords->[3]) {
4984     $self->{onerror}->(node => $attr{coords},
4985 wakaba 1.104 type => 'coords:out of range',
4986     index => 1,
4987     value => $coords->[1],
4988     level => $self->{level}->{must});
4989 wakaba 1.1 }
4990     } else {
4991     $self->{onerror}->(node => $attr{coords},
4992 wakaba 1.104 type => 'coords:number not 4',
4993     text => 0+@$coords,
4994     level => $self->{level}->{must});
4995 wakaba 1.1 }
4996     } else {
4997     ## NOTE: A syntax error has been reported.
4998     }
4999     } else {
5000 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5001 wakaba 1.104 type => 'attribute missing',
5002     text => 'coords',
5003     level => $self->{level}->{must});
5004 wakaba 1.1 }
5005     }
5006 wakaba 1.66
5007     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
5008 wakaba 1.1 },
5009 wakaba 1.59 check_start => sub {
5010     my ($self, $item, $element_state) = @_;
5011     unless ($self->{flag}->{in_map} or
5012     not $item->{node}->manakai_parent_element) {
5013     $self->{onerror}->(node => $item->{node},
5014     type => 'element not allowed:area',
5015 wakaba 1.104 level => $self->{level}->{must});
5016 wakaba 1.59 }
5017 wakaba 1.79
5018     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5019     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5020 wakaba 1.59 },
5021 wakaba 1.1 };
5022    
5023     $Element->{$HTML_NS}->{table} = {
5024 wakaba 1.40 %HTMLChecker,
5025 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5026 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5027 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
5028     cellspacing => $HTMLLengthAttrChecker,
5029 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
5030     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
5031     lhs => 1, rhs => 1, box => 1, border => 1,
5032     }),
5033     rules => $GetHTMLEnumeratedAttrChecker->({
5034     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
5035     }),
5036     summary => sub {}, ## NOTE: %Text; in HTML4.
5037     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
5038     }, {
5039 wakaba 1.49 %HTMLAttrStatus,
5040 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5041 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5042     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5043     border => FEATURE_M12N10_REC,
5044     cellpadding => FEATURE_M12N10_REC,
5045     cellspacing => FEATURE_M12N10_REC,
5046 wakaba 1.61 cols => FEATURE_RFC1942,
5047 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5048     dataformatas => FEATURE_HTML4_REC_RESERVED,
5049     datapagesize => FEATURE_M12N10_REC,
5050     datasrc => FEATURE_HTML4_REC_RESERVED,
5051     frame => FEATURE_M12N10_REC,
5052 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5053 wakaba 1.49 rules => FEATURE_M12N10_REC,
5054     summary => FEATURE_M12N10_REC,
5055     width => FEATURE_M12N10_REC,
5056     }),
5057 wakaba 1.40 check_start => sub {
5058     my ($self, $item, $element_state) = @_;
5059     $element_state->{phase} = 'before caption';
5060 wakaba 1.66
5061     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5062 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5063     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5064 wakaba 1.40 },
5065     check_child_element => sub {
5066     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5067     $child_is_transparent, $element_state) = @_;
5068 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5069     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5070 wakaba 1.40 $self->{onerror}->(node => $child_el,
5071     type => 'element not allowed:minus',
5072 wakaba 1.104 level => $self->{level}->{must});
5073 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5074     #
5075     } elsif ($element_state->{phase} eq 'in tbodys') {
5076     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5077     #$element_state->{phase} = 'in tbodys';
5078     } elsif (not $element_state->{has_tfoot} and
5079     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5080     $element_state->{phase} = 'after tfoot';
5081     $element_state->{has_tfoot} = 1;
5082     } else {
5083 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5084     level => $self->{level}->{must});
5085 wakaba 1.40 }
5086     } elsif ($element_state->{phase} eq 'in trs') {
5087     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5088     #$element_state->{phase} = 'in trs';
5089     } elsif (not $element_state->{has_tfoot} and
5090     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5091     $element_state->{phase} = 'after tfoot';
5092     $element_state->{has_tfoot} = 1;
5093     } else {
5094 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5095     level => $self->{level}->{must});
5096 wakaba 1.40 }
5097     } elsif ($element_state->{phase} eq 'after thead') {
5098     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5099     $element_state->{phase} = 'in tbodys';
5100     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5101     $element_state->{phase} = 'in trs';
5102     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5103     $element_state->{phase} = 'in tbodys';
5104     $element_state->{has_tfoot} = 1;
5105     } else {
5106 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5107     level => $self->{level}->{must});
5108 wakaba 1.40 }
5109     } elsif ($element_state->{phase} eq 'in colgroup') {
5110     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5111     $element_state->{phase} = 'in colgroup';
5112     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5113     $element_state->{phase} = 'after thead';
5114     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5115     $element_state->{phase} = 'in tbodys';
5116     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5117     $element_state->{phase} = 'in trs';
5118     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5119     $element_state->{phase} = 'in tbodys';
5120     $element_state->{has_tfoot} = 1;
5121     } else {
5122 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5123     level => $self->{level}->{must});
5124 wakaba 1.40 }
5125     } elsif ($element_state->{phase} eq 'before caption') {
5126     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5127 wakaba 1.181 $item->{parent_state}->{table_caption_element} = $child_el;
5128 wakaba 1.40 $element_state->{phase} = 'in colgroup';
5129     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5130     $element_state->{phase} = 'in colgroup';
5131     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5132     $element_state->{phase} = 'after thead';
5133     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5134     $element_state->{phase} = 'in tbodys';
5135     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5136     $element_state->{phase} = 'in trs';
5137     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5138     $element_state->{phase} = 'in tbodys';
5139     $element_state->{has_tfoot} = 1;
5140     } else {
5141 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5142     level => $self->{level}->{must});
5143 wakaba 1.40 }
5144     } elsif ($element_state->{phase} eq 'after tfoot') {
5145 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5146     level => $self->{level}->{must});
5147 wakaba 1.40 } else {
5148     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5149     }
5150     },
5151     check_child_text => sub {
5152     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5153     if ($has_significant) {
5154 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5155     level => $self->{level}->{must});
5156 wakaba 1.1 }
5157 wakaba 1.40 },
5158     check_end => sub {
5159     my ($self, $item, $element_state) = @_;
5160 wakaba 1.1
5161     ## Table model errors
5162     require Whatpm::HTMLTable;
5163 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5164 wakaba 1.104 $self->{onerror}->(@_);
5165     }, $self->{level});
5166 wakaba 1.87 Whatpm::HTMLTable->assign_header
5167 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5168 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5169 wakaba 1.1
5170 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5171 wakaba 1.1 },
5172     };
5173    
5174     $Element->{$HTML_NS}->{caption} = {
5175 wakaba 1.169 %HTMLFlowContentChecker,
5176 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5177 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5178     align => $GetHTMLEnumeratedAttrChecker->({
5179     top => 1, bottom => 1, left => 1, right => 1,
5180     }),
5181     }, {
5182 wakaba 1.49 %HTMLAttrStatus,
5183 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5184 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5185 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5186 wakaba 1.49 }),
5187 wakaba 1.169 check_start => sub {
5188     my ($self, $item, $element_state) = @_;
5189     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5190    
5191     $HTMLFlowContentChecker{check_start}->(@_);
5192     },
5193     check_end => sub {
5194     my ($self, $item, $element_state) = @_;
5195     $self->_remove_minus_elements ($element_state);
5196    
5197     $HTMLFlowContentChecker{check_end}->(@_);
5198     },
5199     }; # caption
5200 wakaba 1.1
5201 wakaba 1.69 my %cellalign = (
5202     ## HTML4 %cellhalign;
5203 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5204     left => 1, center => 1, right => 1, justify => 1, char => 1,
5205     }),
5206     char => sub {
5207     my ($self, $attr) = @_;
5208 wakaba 1.69
5209 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5210    
5211     my $value = $attr->value;
5212     if (length $value != 1) {
5213     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5214 wakaba 1.105 level => $self->{level}->{html4_fact});
5215 wakaba 1.70 }
5216     },
5217 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5218    
5219 wakaba 1.69 ## HTML4 %cellvalign;
5220 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5221     top => 1, middle => 1, bottom => 1, baseline => 1,
5222     }),
5223 wakaba 1.69 );
5224    
5225 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5226 wakaba 1.40 %HTMLEmptyChecker,
5227 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5228 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5229 wakaba 1.69 %cellalign,
5230 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5231     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5232     ## TODO: "attribute not supported" if |col|.
5233     ## ISSUE: MUST NOT if any |col|?
5234     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5235 wakaba 1.49 }, {
5236     %HTMLAttrStatus,
5237 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5238 wakaba 1.49 align => FEATURE_M12N10_REC,
5239     char => FEATURE_M12N10_REC,
5240     charoff => FEATURE_M12N10_REC,
5241 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5242     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5243 wakaba 1.49 valign => FEATURE_M12N10_REC,
5244     width => FEATURE_M12N10_REC,
5245 wakaba 1.1 }),
5246 wakaba 1.40 check_child_element => sub {
5247     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5248     $child_is_transparent, $element_state) = @_;
5249 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5250     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5251 wakaba 1.40 $self->{onerror}->(node => $child_el,
5252     type => 'element not allowed:minus',
5253 wakaba 1.104 level => $self->{level}->{must});
5254 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5255     #
5256     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5257     #
5258     } else {
5259 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5260     level => $self->{level}->{must});
5261 wakaba 1.40 }
5262     },
5263     check_child_text => sub {
5264     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5265     if ($has_significant) {
5266 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5267     level => $self->{level}->{must});
5268 wakaba 1.1 }
5269     },
5270     };
5271    
5272     $Element->{$HTML_NS}->{col} = {
5273 wakaba 1.40 %HTMLEmptyChecker,
5274 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5275 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5276 wakaba 1.69 %cellalign,
5277 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5278 wakaba 1.49 }, {
5279     %HTMLAttrStatus,
5280 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5281 wakaba 1.49 align => FEATURE_M12N10_REC,
5282     char => FEATURE_M12N10_REC,
5283     charoff => FEATURE_M12N10_REC,
5284 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5285     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5286 wakaba 1.49 valign => FEATURE_M12N10_REC,
5287     width => FEATURE_M12N10_REC,
5288 wakaba 1.1 }),
5289     };
5290    
5291     $Element->{$HTML_NS}->{tbody} = {
5292 wakaba 1.40 %HTMLChecker,
5293 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5294 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5295     %cellalign,
5296     }, {
5297 wakaba 1.49 %HTMLAttrStatus,
5298 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5299 wakaba 1.49 align => FEATURE_M12N10_REC,
5300     char => FEATURE_M12N10_REC,
5301     charoff => FEATURE_M12N10_REC,
5302 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5303 wakaba 1.49 valign => FEATURE_M12N10_REC,
5304     }),
5305 wakaba 1.40 check_child_element => sub {
5306     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5307     $child_is_transparent, $element_state) = @_;
5308 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5309     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5310 wakaba 1.40 $self->{onerror}->(node => $child_el,
5311     type => 'element not allowed:minus',
5312 wakaba 1.104 level => $self->{level}->{must});
5313 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5314     #
5315     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5316 wakaba 1.84 #
5317 wakaba 1.40 } else {
5318 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5319     level => $self->{level}->{must});
5320 wakaba 1.40 }
5321     },
5322     check_child_text => sub {
5323     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5324     if ($has_significant) {
5325 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5326     level => $self->{level}->{must});
5327 wakaba 1.1 }
5328 wakaba 1.40 },
5329 wakaba 1.1 };
5330    
5331     $Element->{$HTML_NS}->{thead} = {
5332 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5333 wakaba 1.1 };
5334    
5335     $Element->{$HTML_NS}->{tfoot} = {
5336 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5337 wakaba 1.1 };
5338    
5339     $Element->{$HTML_NS}->{tr} = {
5340 wakaba 1.40 %HTMLChecker,
5341 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5342 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5343     %cellalign,
5344     bgcolor => $HTMLColorAttrChecker,
5345     }, {
5346 wakaba 1.49 %HTMLAttrStatus,
5347 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5348 wakaba 1.49 align => FEATURE_M12N10_REC,
5349     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5350     char => FEATURE_M12N10_REC,
5351     charoff => FEATURE_M12N10_REC,
5352 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5353 wakaba 1.49 valign => FEATURE_M12N10_REC,
5354     }),
5355 wakaba 1.40 check_child_element => sub {
5356     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5357     $child_is_transparent, $element_state) = @_;
5358 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5359     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5360 wakaba 1.40 $self->{onerror}->(node => $child_el,
5361     type => 'element not allowed:minus',
5362 wakaba 1.104 level => $self->{level}->{must});
5363 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5364     #
5365     } elsif ($child_nsuri eq $HTML_NS and
5366     ($child_ln eq 'td' or $child_ln eq 'th')) {
5367 wakaba 1.84 #
5368 wakaba 1.40 } else {
5369 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5370     level => $self->{level}->{must});
5371 wakaba 1.40 }
5372     },
5373     check_child_text => sub {
5374     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5375     if ($has_significant) {
5376 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5377     level => $self->{level}->{must});
5378 wakaba 1.1 }
5379     },
5380     };
5381    
5382     $Element->{$HTML_NS}->{td} = {
5383 wakaba 1.72 %HTMLFlowContentChecker,
5384 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5385 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5386 wakaba 1.69 %cellalign,
5387     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5388     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5389     bgcolor => $HTMLColorAttrChecker,
5390 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5391 wakaba 1.87 headers => sub {
5392     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5393     ## Though that method does not check the |headers| attribute of a
5394     ## |td| element if the element does not form a table, in that case
5395     ## the |td| element is non-conforming anyway.
5396     },
5397 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5398 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5399 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5400     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5401 wakaba 1.49 }, {
5402     %HTMLAttrStatus,
5403 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5404     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5405 wakaba 1.49 align => FEATURE_M12N10_REC,
5406 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5407 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5408     char => FEATURE_M12N10_REC,
5409     charoff => FEATURE_M12N10_REC,
5410 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5411     headers => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5412 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5413 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5414 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5415 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5416 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5417 wakaba 1.49 valign => FEATURE_M12N10_REC,
5418     width => FEATURE_M12N10_REC_DEPRECATED,
5419 wakaba 1.1 }),
5420     };
5421    
5422     $Element->{$HTML_NS}->{th} = {
5423 wakaba 1.40 %HTMLPhrasingContentChecker,
5424 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5425 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5426 wakaba 1.69 %cellalign,
5427     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5428     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5429     bgcolor => $HTMLColorAttrChecker,
5430 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5431 wakaba 1.87 ## TODO: HTML4(?) |headers|
5432 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5433 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5434     scope => $GetHTMLEnumeratedAttrChecker
5435     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5436 wakaba 1.49 }, {
5437     %HTMLAttrStatus,
5438 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5439     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5440 wakaba 1.49 align => FEATURE_M12N10_REC,
5441 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5442 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5443     char => FEATURE_M12N10_REC,
5444     charoff => FEATURE_M12N10_REC,
5445 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5446 wakaba 1.82 headers => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5447 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5448 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5449 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5450 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5451     scope => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5452 wakaba 1.49 valign => FEATURE_M12N10_REC,
5453     width => FEATURE_M12N10_REC_DEPRECATED,
5454 wakaba 1.1 }),
5455     };
5456    
5457 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5458 wakaba 1.121 %HTMLFlowContentChecker,
5459 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5460 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5461 wakaba 1.161 accept => $AcceptAttrChecker,
5462 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5463 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5464 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5465 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5466     'application/x-www-form-urlencoded' => 1,
5467     'multipart/form-data' => 1,
5468     'text/plain' => 1,
5469     }),
5470 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5471     get => 1, post => 1, put => 1, delete => 1,
5472     }),
5473 wakaba 1.133 name => sub {
5474     my ($self, $attr) = @_;
5475    
5476     my $value = $attr->value;
5477     if ($value eq '') {
5478     $self->{onerror}->(type => 'empty form name',
5479     node => $attr,
5480     level => $self->{level}->{must});
5481     } else {
5482     if ($self->{form}->{$value}) {
5483     $self->{onerror}->(type => 'duplicate form name',
5484     node => $attr,
5485     value => $value,
5486     level => $self->{level}->{must});
5487     } else {
5488     $self->{form}->{$value} = 1;
5489     }
5490     }
5491     },
5492 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5493     ## TODO: Tests for following attrs:
5494 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5495     onforminput => $HTMLEventHandlerAttrChecker,
5496 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5497     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5498 wakaba 1.52 target => $HTMLTargetAttrChecker,
5499     }, {
5500     %HTMLAttrStatus,
5501     %HTMLM12NCommonAttrStatus,
5502 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5503 wakaba 1.119 'accept-charset' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5504     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5505 wakaba 1.56 data => FEATURE_WF2,
5506 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5507 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5508 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5509     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5510     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5511 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5512 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5513     onforminput => FEATURE_WF2_INFORMATIVE,
5514 wakaba 1.56 onreceived => FEATURE_WF2,
5515 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5516     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5517 wakaba 1.56 replace => FEATURE_WF2,
5518 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5519     sdasuff => FEATURE_HTML20_RFC,
5520 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5521 wakaba 1.52 }),
5522 wakaba 1.66 check_start => sub {
5523     my ($self, $item, $element_state) = @_;
5524 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5525 wakaba 1.66
5526     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5527     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5528 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5529     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5530 wakaba 1.136 $element_state->{id_type} = 'form';
5531 wakaba 1.66 },
5532 wakaba 1.121 check_end => sub {
5533     my ($self, $item, $element_state) = @_;
5534     $self->_remove_minus_elements ($element_state);
5535    
5536     $HTMLFlowContentChecker{check_end}->(@_);
5537     },
5538 wakaba 1.52 };
5539    
5540     $Element->{$HTML_NS}->{fieldset} = {
5541 wakaba 1.134 %HTMLFlowContentChecker,
5542 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5543 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5544     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5545 wakaba 1.136 form => $HTMLFormAttrChecker,
5546 wakaba 1.165 name => $FormControlNameAttrChecker,
5547 wakaba 1.56 }, {
5548 wakaba 1.52 %HTMLAttrStatus,
5549     %HTMLM12NCommonAttrStatus,
5550 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5551     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5552 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5553 wakaba 1.125 name => FEATURE_HTML5_DEFAULT,
5554 wakaba 1.52 }),
5555 wakaba 1.134 ## NOTE: legend, Flow
5556     check_child_element => sub {
5557     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5558     $child_is_transparent, $element_state) = @_;
5559     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5560     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5561     $self->{onerror}->(node => $child_el,
5562     type => 'element not allowed:minus',
5563     level => $self->{level}->{must});
5564     $element_state->{has_non_legend} = 1;
5565     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5566     #
5567     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5568     if ($element_state->{has_non_legend}) {
5569     $self->{onerror}->(node => $child_el,
5570     type => 'element not allowed:details legend',
5571     level => $self->{level}->{must});
5572     }
5573     $element_state->{has_legend} = 1;
5574     $element_state->{has_non_legend} = 1;
5575     } else {
5576     $HTMLFlowContentChecker{check_child_element}->(@_);
5577     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5578     ## TODO:
5579 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5580 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5581     ## therefore |details| part of the content model does not match.
5582     }
5583     },
5584     check_child_text => sub {
5585     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5586     if ($has_significant) {
5587     $element_state->{has_non_legend} = 1;
5588     }
5589     },
5590     check_end => sub {
5591     my ($self, $item, $element_state) = @_;
5592    
5593     unless ($element_state->{has_legend}) {
5594     $self->{onerror}->(node => $item->{node},
5595     type => 'child element missing',
5596     text => 'legend',
5597     level => $self->{level}->{must});
5598     }
5599    
5600     $HTMLFlowContentChecker{check_end}->(@_);
5601 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5602 wakaba 1.134 },
5603     ## NOTE: This definition is partially reused by |details| element's
5604     ## checker.
5605 wakaba 1.52 };
5606    
5607     $Element->{$HTML_NS}->{input} = {
5608 wakaba 1.119 %HTMLEmptyChecker,
5609     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5610 wakaba 1.140 check_attrs => sub {
5611     my ($self, $item, $element_state) = @_;
5612 wakaba 1.142
5613 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5614 wakaba 1.142 $state = 'text' unless defined $state;
5615     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5616    
5617 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5618     my $attr_ns = $attr->namespace_uri;
5619     $attr_ns = '' unless defined $attr_ns;
5620     my $attr_ln = $attr->manakai_local_name;
5621     my $checker;
5622     my $status;
5623     if ($attr_ns eq '') {
5624     $status =
5625     {
5626     %HTMLAttrStatus,
5627     %HTMLM12NCommonAttrStatus,
5628     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5629     'accept-charset' => FEATURE_HTML2X_RFC,
5630 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5631 wakaba 1.140 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5632     align => FEATURE_M12N10_REC_DEPRECATED,
5633     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5634     autocomplete => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5635     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5636     checked => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5637     datafld => FEATURE_HTML4_REC_RESERVED,
5638     dataformatas => FEATURE_HTML4_REC_RESERVED,
5639     datasrc => FEATURE_HTML4_REC_RESERVED,
5640     disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5641     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5642     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5643 wakaba 1.178 height => FEATURE_HTML5_LC,
5644 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5645     FEATURE_XHTMLBASIC11_CR,
5646 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5647 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5648 wakaba 1.140 list => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5649     max => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5650 wakaba 1.150 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X |
5651     FEATURE_M12N10_REC,
5652 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5653     min => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5654 wakaba 1.156 multiple => FEATURE_HTML5_DEFAULT,
5655 wakaba 1.140 name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5656 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5657 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5658     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5659     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5660     onformchange => FEATURE_WF2_INFORMATIVE,
5661     onforminput => FEATURE_WF2_INFORMATIVE,
5662     oninput => FEATURE_WF2,
5663     oninvalid => FEATURE_WF2,
5664     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5665     pattern => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5666 wakaba 1.156 placeholder => FEATURE_HTML5_DEFAULT,
5667 wakaba 1.140 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5668     replace => FEATURE_WF2,
5669     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5670     sdapref => FEATURE_HTML20_RFC,
5671 wakaba 1.154 size => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5672 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5673     step => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5674     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5675     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5676 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5677 wakaba 1.140 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5678     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5679     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5680 wakaba 1.178 width => FEATURE_HTML5_LC,
5681 wakaba 1.140 }->{$attr_ln};
5682    
5683     $checker =
5684     {
5685 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5686     ## applicable for a specific set of states.
5687 wakaba 1.142 accept => '',
5688 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5689     ## NOTE: To which states it applies is not defined in RFC 2070.
5690 wakaba 1.142 action => '',
5691 wakaba 1.150 align => '',
5692 wakaba 1.141 alt => '',
5693 wakaba 1.142 autocomplete => '',
5694 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5695     ## NOTE: <input type=hidden disabled> is not disallowed.
5696 wakaba 1.142 checked => '',
5697     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5698 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5699 wakaba 1.142 enctype => '',
5700     form => $HTMLFormAttrChecker,
5701 wakaba 1.178 height => '',
5702 wakaba 1.150 inputmode => '',
5703     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5704 wakaba 1.142 list => '',
5705     max => '',
5706     maxlength => '',
5707     method => '',
5708     min => '',
5709 wakaba 1.156 multiple => '',
5710 wakaba 1.165 name => $FormControlNameAttrChecker,
5711 wakaba 1.166 novalidate => '',
5712 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5713     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5714     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5715     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5716     ## TODO: tests for four attributes above
5717 wakaba 1.142 pattern => '',
5718 wakaba 1.156 placeholder => '',
5719 wakaba 1.142 readonly => '',
5720 wakaba 1.150 replace => '',
5721 wakaba 1.142 required => '',
5722     size => '',
5723     src => '',
5724     step => '',
5725     target => '',
5726 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5727 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5728     email => 1, password => 1,
5729 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5730 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5731     checkbox => 1,
5732 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5733     button => 1,
5734 wakaba 1.140 }),
5735 wakaba 1.151 usemap => '',
5736 wakaba 1.142 value => '',
5737 wakaba 1.178 width => '',
5738 wakaba 1.140 }->{$attr_ln};
5739 wakaba 1.141
5740     ## State-dependent checkers
5741     unless ($checker) {
5742     if ($state eq 'hidden') {
5743     $checker =
5744     {
5745 wakaba 1.142 value => sub {
5746     my ($self, $attr, $item, $element_state) = @_;
5747 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5748 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5749     $self->{onerror}->(node => $attr,
5750     type => '_charset_ value',
5751     level => $self->{level}->{must});
5752     }
5753     },
5754 wakaba 1.141 }->{$attr_ln} || $checker;
5755 wakaba 1.142 ## TODO: Warn if no name attribute?
5756     ## TODO: Warn if name!=_charset_ and no value attribute?
5757 wakaba 1.168 } elsif ({
5758     datetime => 1, date => 1, month => 1, time => 1,
5759     week => 1, 'datetime-local' => 1,
5760     }->{$state}) {
5761     my $v = {
5762     datetime => ['global_date_and_time_string'],
5763     date => ['date_string'],
5764     month => ['month_string'],
5765     week => ['week_string'],
5766     time => ['time_string'],
5767     'datetime-local' => ['local_date_and_time_string'],
5768     }->{$state};
5769 wakaba 1.144 $checker =
5770     {
5771 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5772     on => 1, off => 1,
5773     }),
5774 wakaba 1.158 list => $ListAttrChecker,
5775 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5776     max => $GetDateTimeAttrChecker->($v->[0]),
5777 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5778 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5779 wakaba 1.148 step => $StepAttrChecker,
5780 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5781 wakaba 1.144 }->{$attr_ln} || $checker;
5782     } elsif ($state eq 'number') {
5783     $checker =
5784     {
5785 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5786     on => 1, off => 1,
5787     }),
5788 wakaba 1.158 list => $ListAttrChecker,
5789 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5790     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5791 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5792 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5793 wakaba 1.148 step => $StepAttrChecker,
5794 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5795 wakaba 1.144 }->{$attr_ln} || $checker;
5796     } elsif ($state eq 'range') {
5797     $checker =
5798     {
5799 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5800     on => 1, off => 1,
5801     }),
5802 wakaba 1.158 list => $ListAttrChecker,
5803 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5804     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5805 wakaba 1.148 step => $StepAttrChecker,
5806 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5807 wakaba 1.144 }->{$attr_ln} || $checker;
5808 wakaba 1.157 } elsif ($state eq 'color') {
5809     $checker =
5810     {
5811     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5812     on => 1, off => 1,
5813     }),
5814 wakaba 1.158 list => $ListAttrChecker,
5815 wakaba 1.157 value => sub {
5816     my ($self, $attr) = @_;
5817     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5818     $self->{onerror}->(node => $attr,
5819     type => 'scolor:syntax error', ## TODOC: type
5820     level => $self->{level}->{must});
5821     }
5822     },
5823     }->{$attr_ln} || $checker;
5824 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5825     $checker =
5826     {
5827 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5828     ## TODO: tests
5829 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5830 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5831     }->{$attr_ln} || $checker;
5832     ## TODO: There MUST be another input type=radio with same
5833     ## name (Radio state).
5834     ## ISSUE: There should be exactly one type=radio with checked?
5835     } elsif ($state eq 'file') {
5836     $checker =
5837     {
5838 wakaba 1.161 accept => $AcceptAttrChecker,
5839 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5840 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5841 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5842 wakaba 1.144 }->{$attr_ln} || $checker;
5843     } elsif ($state eq 'submit') {
5844     $checker =
5845     {
5846 wakaba 1.149 action => $HTMLURIAttrChecker,
5847 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5848     'application/x-www-form-urlencoded' => 1,
5849     'multipart/form-data' => 1,
5850     'text/plain' => 1,
5851     }),
5852 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5853     get => 1, post => 1, put => 1, delete => 1,
5854     }),
5855 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5856 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5857     document => 1, values => 1,
5858     }),
5859     target => $HTMLTargetAttrChecker,
5860 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5861     }->{$attr_ln} || $checker;
5862     } elsif ($state eq 'image') {
5863     $checker =
5864     {
5865 wakaba 1.149 action => $HTMLURIAttrChecker,
5866     align => $GetHTMLEnumeratedAttrChecker->({
5867     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5868     }),
5869 wakaba 1.144 alt => sub {
5870     my ($self, $attr) = @_;
5871     my $value = $attr->value;
5872     unless (length $value) {
5873     $self->{onerror}->(node => $attr,
5874     type => 'empty anchor image alt',
5875     level => $self->{level}->{must});
5876     }
5877     },
5878 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5879     'application/x-www-form-urlencoded' => 1,
5880     'multipart/form-data' => 1,
5881     'text/plain' => 1,
5882     }),
5883 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
5884 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5885     method => $GetHTMLEnumeratedAttrChecker->({
5886     get => 1, post => 1, put => 1, delete => 1,
5887     }),
5888 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5889 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5890     document => 1, values => 1,
5891     }),
5892 wakaba 1.144 src => $HTMLURIAttrChecker,
5893     ## TODO: There is requirements on the referenced resource.
5894 wakaba 1.149 target => $HTMLTargetAttrChecker,
5895     usemap => $HTMLUsemapAttrChecker,
5896 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
5897 wakaba 1.144 }->{$attr_ln} || $checker;
5898     ## TODO: alt & src are required.
5899     } elsif ({
5900     reset => 1, button => 1,
5901     ## NOTE: From Web Forms 2.0:
5902     remove => 1, 'move-up' => 1, 'move-down' => 1,
5903     add => 1,
5904     }->{$state}) {
5905     $checker =
5906     {
5907     ## NOTE: According to Web Forms 2.0, |input| attribute
5908     ## has |template| attribute to support the |add| button
5909     ## type (as part of the repetition template feature). It
5910     ## conflicts with the |template| global attribute
5911     ## introduced as part of the data template feature.
5912     ## NOTE: |template| attribute as defined in Web Forms 2.0
5913     ## has no author requirement.
5914     value => sub { }, ## NOTE: No restriction.
5915     }->{$attr_ln} || $checker;
5916 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5917 wakaba 1.141 $checker =
5918     {
5919 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5920     on => 1, off => 1,
5921     }),
5922 wakaba 1.149 ## TODO: inputmode [WF2]
5923 wakaba 1.158 list => $ListAttrChecker,
5924 wakaba 1.147 maxlength => sub {
5925     my ($self, $attr, $item, $element_state) = @_;
5926    
5927     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5928    
5929 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5930 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5931     ## integers results in a number.
5932     my $max_allowed_value_length = 0+$1;
5933    
5934     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5935     if (defined $value) {
5936     my $codepoint_length = length $value;
5937 wakaba 1.162
5938 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5939     $self->{onerror}
5940     ->(node => $item->{node}
5941     ->get_attribute_node_ns (undef, 'value'),
5942     type => 'value too long',
5943     level => $self->{level}->{must});
5944     }
5945     }
5946     }
5947     },
5948 wakaba 1.160 pattern => $PatternAttrChecker,
5949 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
5950 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5951 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5952 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5953 wakaba 1.143 value => sub {
5954 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5955     if ($state eq 'url') {
5956     $HTMLURIAttrChecker->(@_);
5957     } elsif ($state eq 'email') {
5958     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5959     my @addr = split /,/, $attr->value, -1;
5960     @addr = ('') unless @addr;
5961     for (@addr) {
5962 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5963     s/[\x09\x0A\x0C\x0D\x20]\z//;
5964 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5965     $self->{onerror}->(node => $attr,
5966     type => 'email:syntax error', ## TODO: type
5967     value => $_,
5968     level => $self->{level}->{must});
5969     }
5970     }
5971     } else {
5972     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5973     $self->{onerror}->(node => $attr,
5974     type => 'email:syntax error', ## TODO: type
5975     level => $self->{level}->{must});
5976     }
5977     }
5978     } else {
5979     if ($attr->value =~ /[\x0D\x0A]/) {
5980     $self->{onerror}->(node => $attr,
5981     type => 'newline in value', ## TODO: type
5982     level => $self->{level}->{must});
5983     }
5984     }
5985 wakaba 1.143 },
5986 wakaba 1.141 }->{$attr_ln} || $checker;
5987 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5988 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
5989     if $state eq 'email' and $attr_ln eq 'multiple';
5990 wakaba 1.161
5991     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
5992     not $item->{node}->has_attribute_ns (undef, 'title')) {
5993     $self->{onerror}->(node => $item->{node},
5994     type => 'attribute missing',
5995     text => 'title',
5996     level => $self->{level}->{should});
5997     }
5998 wakaba 1.141 }
5999     }
6000    
6001     if (defined $checker) {
6002     if ($checker eq '') {
6003     $checker = sub {
6004     my ($self, $attr) = @_;
6005     $self->{onerror}->(node => $attr,
6006     type => 'input attr not applicable',
6007     text => $state,
6008     level => $self->{level}->{must});
6009     };
6010     }
6011 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
6012     $attr_ln !~ /[A-Z]/) {
6013     $checker = $HTMLDatasetAttrChecker;
6014     $status = $HTMLDatasetAttrStatus;
6015     } else {
6016     $checker = $HTMLAttrChecker->{$attr_ln};
6017     }
6018     }
6019     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
6020     || $AttrChecker->{$attr_ns}->{''};
6021     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
6022     || $AttrStatus->{$attr_ns}->{''};
6023     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
6024 wakaba 1.157
6025 wakaba 1.140 if ($checker) {
6026     $checker->($self, $attr, $item, $element_state) if ref $checker;
6027     } elsif ($attr_ns eq '' and not $status) {
6028     #
6029     } else {
6030     $self->{onerror}->(node => $attr,
6031     type => 'unknown attribute',
6032     level => $self->{level}->{uncertain});
6033     ## ISSUE: No comformance createria for unknown attributes in the spec
6034     }
6035    
6036     $self->_attr_status_info ($attr, $status);
6037     }
6038 wakaba 1.168
6039     ## ISSUE: -0/+0
6040    
6041     if ($state eq 'range') {
6042     $element_state->{number_value}->{min} ||= 0;
6043     $element_state->{number_value}->{max} = 100
6044     unless defined $element_state->{number_value}->{max};
6045     }
6046    
6047     if (defined $element_state->{date_value}->{min} or
6048     defined $element_state->{date_value}->{max}) {
6049     my $min_value = $element_state->{date_value}->{min};
6050     my $max_value = $element_state->{date_value}->{max};
6051     my $value_value = $element_state->{date_value}->{value};
6052    
6053     if (defined $min_value and $min_value eq '' and
6054     (defined $max_value or defined $value_value)) {
6055     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6056     $self->{onerror}->(node => $min,
6057     type => 'date value not supported', ## TODOC: type
6058     value => $min->value,
6059     level => $self->{level}->{unsupported});
6060     undef $min_value;
6061     }
6062     if (defined $max_value and $max_value eq '' and
6063     (defined $max_value or defined $value_value)) {
6064     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6065     $self->{onerror}->(node => $max,
6066     type => 'date value not supported', ## TODOC: type
6067     value => $max->value,
6068     level => $self->{level}->{unsupported});
6069     undef $max_value;
6070     }
6071     if (defined $value_value and $value_value eq '' and
6072     (defined $max_value or defined $min_value)) {
6073     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6074     $self->{onerror}->(node => $value,
6075     type => 'date value not supported', ## TODOC: type
6076     value => $value->value,
6077     level => $self->{level}->{unsupported});
6078     undef $value_value;
6079     }
6080    
6081     if (defined $min_value and defined $max_value) {
6082     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6083     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6084     $self->{onerror}->(node => $max,
6085     type => 'max lt min', ## TODOC: type
6086     level => $self->{level}->{must});
6087     }
6088     }
6089    
6090     if (defined $min_value and defined $value_value) {
6091     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6092     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6093     $self->{onerror}->(node => $value,
6094     type => 'value lt min', ## TODOC: type
6095     level => $self->{level}->{warn});
6096     ## NOTE: Not an error.
6097     }
6098     }
6099    
6100     if (defined $max_value and defined $value_value) {
6101     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6102     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6103     $self->{onerror}->(node => $value,
6104     type => 'value gt max', ## TODOC: type
6105     level => $self->{level}->{warn});
6106     ## NOTE: Not an error.
6107     }
6108     }
6109     } elsif (defined $element_state->{number_value}->{min} or
6110     defined $element_state->{number_value}->{max}) {
6111     my $min_value = $element_state->{number_value}->{min};
6112     my $max_value = $element_state->{number_value}->{max};
6113     my $value_value = $element_state->{number_value}->{value};
6114    
6115     if (defined $min_value and defined $max_value) {
6116     if ($min_value > $max_value) {
6117     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6118     $self->{onerror}->(node => $max,
6119     type => 'max lt min', ## TODOC: type
6120     level => $self->{level}->{must});
6121     }
6122     }
6123    
6124     if (defined $min_value and defined $value_value) {
6125     if ($min_value > $value_value) {
6126     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6127     $self->{onerror}->(node => $value,
6128     type => 'value lt min', ## TODOC: type
6129     level => $self->{level}->{warn});
6130     ## NOTE: Not an error.
6131     }
6132     }
6133    
6134     if (defined $max_value and defined $value_value) {
6135     if ($max_value < $value_value) {
6136     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6137     $self->{onerror}->(node => $value,
6138     type => 'value gt max', ## TODOC: type
6139     level => $self->{level}->{warn});
6140     ## NOTE: Not an error.
6141     }
6142     }
6143     }
6144 wakaba 1.150
6145 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6146    
6147 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6148     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6149     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6150     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6151     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6152 wakaba 1.140 },
6153 wakaba 1.66 check_start => sub {
6154     my ($self, $item, $element_state) = @_;
6155 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6156     $self->{onerror}->(node => $item->{node},
6157     type => 'multiple labelable fae',
6158     level => $self->{level}->{must});
6159     } else {
6160     $self->{flag}->{has_labelable} = 2;
6161     }
6162 wakaba 1.138
6163     $element_state->{id_type} = 'labelable';
6164 wakaba 1.66 },
6165 wakaba 1.52 };
6166    
6167 wakaba 1.178 ## XXXresource: Dimension attributes have requirements on width and
6168     ## height of referenced resource.
6169 wakaba 1.80
6170 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6171 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6172     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6173 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6174 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6175     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6176     ## |button| elements.
6177 wakaba 1.56 action => $HTMLURIAttrChecker,
6178 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6179 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6180 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6181     'application/x-www-form-urlencoded' => 1,
6182     'multipart/form-data' => 1,
6183     'text/plain' => 1,
6184     }),
6185 wakaba 1.136 form => $HTMLFormAttrChecker,
6186 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6187     get => 1, post => 1, put => 1, delete => 1,
6188     }),
6189 wakaba 1.165 name => $FormControlNameAttrChecker,
6190 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6191 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6192     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6193 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6194     target => $HTMLTargetAttrChecker,
6195 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6196     ## attribute to support the |add| button type (as part of repetition
6197     ## template feature). It conflicts with the |template| global attribute
6198     ## introduced as part of the data template feature.
6199     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6200     ## author requirement.
6201 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6202     button => 1, submit => 1, reset => 1,
6203     }),
6204 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6205 wakaba 1.52 }, {
6206     %HTMLAttrStatus,
6207     %HTMLM12NCommonAttrStatus,
6208 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
6209 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6210     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6211 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6212     dataformatas => FEATURE_HTML4_REC_RESERVED,
6213     datasrc => FEATURE_HTML4_REC_RESERVED,
6214 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6215     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6216     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6217 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6218 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6219     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6220 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6221 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6222     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6223 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6224     onforminput => FEATURE_WF2_INFORMATIVE,
6225 wakaba 1.56 replace => FEATURE_WF2,
6226 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6227 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6228 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6229 wakaba 1.119 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6230     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6231 wakaba 1.52 }),
6232 wakaba 1.66 check_start => sub {
6233     my ($self, $item, $element_state) = @_;
6234 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6235     $self->{onerror}->(node => $item->{node},
6236     type => 'multiple labelable fae',
6237     level => $self->{level}->{must});
6238     } else {
6239     $self->{flag}->{has_labelable} = 2;
6240     }
6241 wakaba 1.162
6242     ## ISSUE: "The value attribute must not be present unless the form
6243     ## [content] attribute is present.": Wrong?
6244 wakaba 1.139
6245 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6246     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6247 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6248     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6249 wakaba 1.138
6250     $element_state->{id_type} = 'labelable';
6251 wakaba 1.66 },
6252 wakaba 1.52 };
6253    
6254     $Element->{$HTML_NS}->{label} = {
6255 wakaba 1.139 %HTMLPhrasingContentChecker,
6256 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC
6257     | FEATURE_XHTML2_ED,
6258 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6259 wakaba 1.138 for => sub {
6260     my ($self, $attr) = @_;
6261    
6262     ## NOTE: MUST be an ID of a labelable element.
6263    
6264     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6265     },
6266 wakaba 1.136 form => $HTMLFormAttrChecker,
6267 wakaba 1.52 }, {
6268     %HTMLAttrStatus,
6269 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6270 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2 | FEATURE_M12N10_REC,
6271 wakaba 1.119 for => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6272     form => FEATURE_HTML5_DEFAULT,
6273 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6274 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6275     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6276     }),
6277 wakaba 1.139 check_start => sub {
6278     my ($self, $item, $element_state) = @_;
6279     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6280    
6281     $element_state->{has_label_original} = $self->{flag}->{has_label};
6282     $self->{flag}->{has_label} = 1;
6283     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6284 wakaba 1.155 $self->{flag}->{has_labelable}
6285     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6286 wakaba 1.139
6287     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6288     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6289     },
6290     check_end => sub {
6291     my ($self, $item, $element_state) = @_;
6292     $self->_remove_minus_elements ($element_state);
6293    
6294     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6295     $self->{flag}->{has_labelable}
6296     = $element_state->{has_labelable_original};
6297     }
6298     delete $self->{flag}->{has_label}
6299     unless $element_state->{has_label_original};
6300     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6301    
6302     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6303    
6304     $HTMLPhrasingContentChecker{check_end}->(@_);
6305     },
6306 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6307     };
6308    
6309     $Element->{$HTML_NS}->{select} = {
6310 wakaba 1.121 %HTMLChecker,
6311 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6312 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6313     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6314 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6315 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6316 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6317 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6318 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6319 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6320 wakaba 1.136 form => $HTMLFormAttrChecker,
6321 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6322 wakaba 1.165 name => $FormControlNameAttrChecker,
6323 wakaba 1.163 ## TODO: tests for on*
6324 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6325     onforminput => $HTMLEventHandlerAttrChecker,
6326     oninput => $HTMLEventHandlerAttrChecker,
6327 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6328 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6329 wakaba 1.52 }, {
6330     %HTMLAttrStatus,
6331     %HTMLM12NCommonAttrStatus,
6332 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2,
6333 wakaba 1.119 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6334 wakaba 1.56 data => FEATURE_WF2,
6335 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6336     dataformatas => FEATURE_HTML4_REC_RESERVED,
6337     datasrc => FEATURE_HTML4_REC_RESERVED,
6338 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6339     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6340 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6341 wakaba 1.119 multiple => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6342     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6343 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6344     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6345 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6346     onforminput => FEATURE_WF2_INFORMATIVE,
6347 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6348 wakaba 1.126 oninput => FEATURE_WF2,
6349 wakaba 1.56 oninvalid => FEATURE_WF2,
6350 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6351     sdapref => FEATURE_HTML20_RFC,
6352 wakaba 1.119 size => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6353 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6354     }),
6355 wakaba 1.66 check_start => sub {
6356     my ($self, $item, $element_state) = @_;
6357 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6358     $self->{onerror}->(node => $item->{node},
6359     type => 'multiple labelable fae',
6360     level => $self->{level}->{must});
6361     } else {
6362     $self->{flag}->{has_labelable} = 2;
6363     }
6364 wakaba 1.66
6365     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6366     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6367 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6368     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6369 wakaba 1.138
6370     $element_state->{id_type} = 'labelable';
6371 wakaba 1.66 },
6372 wakaba 1.121 check_child_element => sub {
6373 wakaba 1.163 ## NOTE: (option | optgroup)*
6374    
6375 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6376     $child_is_transparent, $element_state) = @_;
6377 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6378     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6379 wakaba 1.121 $self->{onerror}->(node => $child_el,
6380     type => 'element not allowed:minus',
6381     level => $self->{level}->{must});
6382     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6383     #
6384     } elsif ($child_nsuri eq $HTML_NS and
6385     {
6386     option => 1, optgroup => 1,
6387     }->{$child_ln}) {
6388     #
6389     } else {
6390     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6391     level => $self->{level}->{must});
6392     }
6393     },
6394     check_child_text => sub {
6395     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6396     if ($has_significant) {
6397     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6398     level => $self->{level}->{must});
6399     }
6400     },
6401 wakaba 1.52 };
6402 wakaba 1.1
6403 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6404 wakaba 1.121 %HTMLPhrasingContentChecker,
6405 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6406 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6407     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6408     }, {
6409 wakaba 1.52 %HTMLAttrStatus,
6410 wakaba 1.56 data => FEATURE_WF2,
6411 wakaba 1.52 }),
6412 wakaba 1.66 check_start => sub {
6413     my ($self, $item, $element_state) = @_;
6414    
6415 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6416    
6417 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6418 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6419     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6420 wakaba 1.158
6421     $element_state->{id_type} = 'datalist';
6422 wakaba 1.66 },
6423 wakaba 1.121 ## NOTE: phrasing | option*
6424     check_child_element => sub {
6425     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6426     $child_is_transparent, $element_state) = @_;
6427 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6428     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6429 wakaba 1.121 $self->{onerror}->(node => $child_el,
6430     type => 'element not allowed:minus',
6431     level => $self->{level}->{must});
6432     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6433     #
6434     } elsif ($element_state->{phase} eq 'phrasing') {
6435     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6436     #
6437     } else {
6438     $self->{onerror}->(node => $child_el,
6439     type => 'element not allowed:phrasing',
6440     level => $self->{level}->{must});
6441     }
6442     } elsif ($element_state->{phase} eq 'option') {
6443     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6444     #
6445     } else {
6446     $self->{onerror}->(node => $child_el,
6447     type => 'element not allowed',
6448     level => $self->{level}->{must});
6449     }
6450     } elsif ($element_state->{phase} eq 'any') {
6451     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6452     $element_state->{phase} = 'phrasing';
6453     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6454     $element_state->{phase} = 'option';
6455     } else {
6456     $self->{onerror}->(node => $child_el,
6457     type => 'element not allowed',
6458     level => $self->{level}->{must});
6459     }
6460     } else {
6461     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6462     }
6463     },
6464     check_child_text => sub {
6465     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6466     if ($has_significant) {
6467     if ($element_state->{phase} eq 'phrasing') {
6468     #
6469     } elsif ($element_state->{phase} eq 'any') {
6470     $element_state->{phase} = 'phrasing';
6471     } else {
6472     $self->{onerror}->(node => $child_node,
6473     type => 'character not allowed',
6474     level => $self->{level}->{must});
6475     }
6476     }
6477     },
6478     check_end => sub {
6479     my ($self, $item, $element_state) = @_;
6480     if ($element_state->{phase} eq 'phrasing') {
6481     if ($element_state->{has_significant}) {
6482     $item->{real_parent_state}->{has_significant} = 1;
6483     } elsif ($item->{transparent}) {
6484     #
6485     } else {
6486     $self->{onerror}->(node => $item->{node},
6487     type => 'no significant content',
6488     level => $self->{level}->{should});
6489     }
6490     } else {
6491     ## NOTE: Since the content model explicitly allows a |datalist| element
6492     ## being empty, we don't raise "no significant content" error for this
6493     ## element when there is no element. (We should raise an error for
6494     ## |<datalist><br></datalist>|, however.)
6495     ## NOTE: As a side-effect, when the |datalist| element only contains
6496     ## non-conforming content, then the |phase| flag has not changed from
6497     ## |any|, no "no significant content" error is raised neither.
6498     $HTMLChecker{check_end}->(@_);
6499     }
6500     },
6501 wakaba 1.52 };
6502 wakaba 1.49
6503 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6504 wakaba 1.121 %HTMLChecker,
6505 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6506 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6507     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6508 wakaba 1.164 label => sub {},
6509 wakaba 1.52 }, {
6510     %HTMLAttrStatus,
6511     %HTMLM12NCommonAttrStatus,
6512 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6513     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6514 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6515 wakaba 1.52 }),
6516 wakaba 1.164 check_attrs2 => sub {
6517     my ($self, $item, $element_state) = @_;
6518    
6519     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6520     $self->{onerror}->(node => $item->{node},
6521     type => 'attribute missing',
6522     text => 'label',
6523     level => $self->{level}->{must});
6524     }
6525     },
6526 wakaba 1.121 check_child_element => sub {
6527     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6528     $child_is_transparent, $element_state) = @_;
6529 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6530     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6531 wakaba 1.121 $self->{onerror}->(node => $child_el,
6532     type => 'element not allowed:minus',
6533     level => $self->{level}->{must});
6534     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6535     #
6536     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6537     #
6538     } else {
6539     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6540     level => $self->{level}->{must});
6541     }
6542     },
6543     check_child_text => sub {
6544     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6545     if ($has_significant) {
6546     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6547     level => $self->{level}->{must});
6548     }
6549     },
6550 wakaba 1.52 };
6551    
6552     $Element->{$HTML_NS}->{option} = {
6553     %HTMLTextChecker,
6554 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6555 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6556     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6557 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6558     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6559     value => sub {}, ## NOTE: No restriction.
6560 wakaba 1.52 }, {
6561     %HTMLAttrStatus,
6562     %HTMLM12NCommonAttrStatus,
6563 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6564     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6565 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6566 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6567     sdapref => FEATURE_HTML20_RFC,
6568 wakaba 1.119 selected => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6569     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6570 wakaba 1.52 }),
6571     };
6572 wakaba 1.49
6573 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6574     %HTMLTextChecker,
6575 wakaba 1.121 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6576 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6577 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6578 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6579 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6580 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6581 wakaba 1.136 form => $HTMLFormAttrChecker,
6582 wakaba 1.56 ## TODO: inputmode [WF2]
6583 wakaba 1.164 maxlength => sub {
6584     my ($self, $attr, $item, $element_state) = @_;
6585    
6586     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6587    
6588 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6589 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6590     ## results in a number.
6591     my $max_allowed_value_length = 0+$1;
6592    
6593     ## ISSUE: "The the purposes of this requirement," (typo)
6594    
6595     ## ISSUE: This constraint is applied w/o CRLF normalization to
6596     ## |value| attribute, but w/ CRLF normalization to
6597     ## concept-value.
6598     my $value = $item->{node}->text_content;
6599     if (defined $value) {
6600     my $codepoint_length = length $value;
6601    
6602     if ($codepoint_length > $max_allowed_value_length) {
6603     $self->{onerror}->(node => $item->{node},
6604     type => 'value too long',
6605     level => $self->{level}->{must});
6606     }
6607     }
6608     }
6609     },
6610 wakaba 1.165 name => $FormControlNameAttrChecker,
6611 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6612     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6613     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6614 wakaba 1.161 pattern => $PatternAttrChecker,
6615 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
6616 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6617 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6618 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6619     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6620     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6621 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6622 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6623 wakaba 1.52 }, {
6624     %HTMLAttrStatus,
6625     %HTMLM12NCommonAttrStatus,
6626 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6627 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6628 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
6629 wakaba 1.121 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6630     cols => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6631 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6632 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6633     datasrc => FEATURE_HTML4_REC_RESERVED,
6634 wakaba 1.121 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6635     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6636 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6637 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6638 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6639     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6640 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6641     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6642     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6643 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6644     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6645     oninput => FEATURE_WF2, ## TODO: tests
6646     oninvalid => FEATURE_WF2, ## TODO: tests
6647 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6648 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6649 wakaba 1.179 placeholder => FEATURE_HTML5_LC,
6650 wakaba 1.121 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6651     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6652     rows => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6653 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6654     sdapref => FEATURE_HTML20_RFC,
6655 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6656 wakaba 1.121 wrap => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6657 wakaba 1.52 }),
6658 wakaba 1.66 check_start => sub {
6659     my ($self, $item, $element_state) = @_;
6660 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6661     $self->{onerror}->(node => $item->{node},
6662     type => 'multiple labelable fae',
6663     level => $self->{level}->{must});
6664     } else {
6665     $self->{flag}->{has_labelable} = 2;
6666     }
6667 wakaba 1.164
6668     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6669     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6670     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6671    
6672     $element_state->{id_type} = 'labelable';
6673     },
6674     check_attrs2 => sub {
6675     my ($self, $item, $element_state) = @_;
6676 wakaba 1.66
6677 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6678     not $item->{node}->has_attribute_ns (undef, 'title')) {
6679     ## NOTE: WF2 (dropped by HTML5)
6680     $self->{onerror}->(node => $item->{node},
6681     type => 'attribute missing',
6682     text => 'title',
6683     level => $self->{level}->{should});
6684     }
6685    
6686 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6687     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6688     if (defined $wrap) {
6689     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6690     if ($wrap eq 'hard') {
6691     $self->{onerror}->(node => $item->{node},
6692     type => 'attribute missing',
6693     text => 'cols',
6694     level => $self->{level}->{must});
6695     }
6696     }
6697     }
6698 wakaba 1.66 },
6699 wakaba 1.52 };
6700 wakaba 1.49
6701 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6702 wakaba 1.121 %HTMLPhrasingContentChecker,
6703     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6704 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6705 wakaba 1.165 for => sub {
6706     my ($self, $attr) = @_;
6707    
6708     ## NOTE: "Unordered set of unique space-separated tokens".
6709    
6710     my %word;
6711     for my $word (grep {length $_}
6712     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6713     unless ($word{$word}) {
6714     $word{$word} = 1;
6715     push @{$self->{idref}}, ['any', $word, $attr];
6716     } else {
6717     $self->{onerror}->(node => $attr, type => 'duplicate token',
6718     value => $word,
6719     level => $self->{level}->{must});
6720     }
6721     }
6722     },
6723 wakaba 1.136 form => $HTMLFormAttrChecker,
6724 wakaba 1.165 name => $FormControlNameAttrChecker,
6725     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6726     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6727 wakaba 1.56 }, {
6728 wakaba 1.52 %HTMLAttrStatus,
6729 wakaba 1.121 for => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6730     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6731     name => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6732 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6733     onformchange => FEATURE_WF2,
6734     onforminput => FEATURE_WF2,
6735 wakaba 1.52 }),
6736     };
6737    
6738     $Element->{$HTML_NS}->{isindex} = {
6739     %HTMLEmptyChecker,
6740 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6741     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6742 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6743     prompt => sub {}, ## NOTE: Text [M12N]
6744     }, {
6745     %HTMLAttrStatus,
6746 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6747     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6748     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6749     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6750 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6751 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6752 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6753     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6754 wakaba 1.52 }),
6755     ## TODO: Tests
6756     ## TODO: Tests for <nest/> in <isindex>
6757 wakaba 1.66 check_start => sub {
6758     my ($self, $item, $element_state) = @_;
6759    
6760     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6761 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6762     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6763 wakaba 1.66 },
6764 wakaba 1.52 };
6765 wakaba 1.49
6766 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6767 wakaba 1.40 %HTMLChecker,
6768 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6769 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6770 wakaba 1.91 charset => sub {
6771     my ($self, $attr) = @_;
6772    
6773     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6774     $self->{onerror}->(type => 'attribute not allowed',
6775     node => $attr,
6776 wakaba 1.104 level => $self->{level}->{must});
6777 wakaba 1.91 }
6778    
6779     $HTMLCharsetChecker->($attr->value, @_);
6780     },
6781 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6782 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6783 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6784     async => $GetHTMLBooleanAttrChecker->('async'),
6785 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6786 wakaba 1.49 }, {
6787     %HTMLAttrStatus,
6788 wakaba 1.153 async => FEATURE_HTML5_WD,
6789     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6790     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6791 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6792     for => FEATURE_HTML4_REC_RESERVED,
6793 wakaba 1.154 href => FEATURE_RDFA_REC,
6794 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6795 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6796 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6797     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6798 wakaba 1.9 }),
6799 wakaba 1.40 check_start => sub {
6800     my ($self, $item, $element_state) = @_;
6801 wakaba 1.1
6802 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6803     $element_state->{must_be_empty} = 1;
6804 wakaba 1.1 } else {
6805     ## NOTE: No content model conformance in HTML5 spec.
6806 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6807     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6808 wakaba 1.1 if ((defined $type and $type eq '') or
6809     (defined $language and $language eq '')) {
6810     $type = 'text/javascript';
6811     } elsif (defined $type) {
6812     #
6813     } elsif (defined $language) {
6814     $type = 'text/' . $language;
6815     } else {
6816     $type = 'text/javascript';
6817     }
6818 wakaba 1.93
6819     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6820     $type = "$1/$2";
6821     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6822     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6823     }
6824     $element_state->{script_type} = $type;
6825 wakaba 1.40 }
6826 wakaba 1.66
6827     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6828 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6829     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6830 wakaba 1.107
6831     $element_state->{text} = '';
6832 wakaba 1.40 },
6833     check_child_element => sub {
6834     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6835     $child_is_transparent, $element_state) = @_;
6836 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6837     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6838 wakaba 1.40 $self->{onerror}->(node => $child_el,
6839     type => 'element not allowed:minus',
6840 wakaba 1.104 level => $self->{level}->{must});
6841 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6842     #
6843     } else {
6844     if ($element_state->{must_be_empty}) {
6845     $self->{onerror}->(node => $child_el,
6846 wakaba 1.104 type => 'element not allowed:empty',
6847     level => $self->{level}->{must});
6848 wakaba 1.40 }
6849     }
6850     },
6851     check_child_text => sub {
6852     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6853     if ($has_significant and
6854     $element_state->{must_be_empty}) {
6855     $self->{onerror}->(node => $child_node,
6856 wakaba 1.104 type => 'character not allowed:empty',
6857     level => $self->{level}->{must});
6858 wakaba 1.40 }
6859 wakaba 1.115 $element_state->{text} .= $child_node->data;
6860 wakaba 1.40 },
6861     check_end => sub {
6862     my ($self, $item, $element_state) = @_;
6863     unless ($element_state->{must_be_empty}) {
6864 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6865     ## NOTE: XML content should be checked by THIS instance of checker
6866     ## as part of normal tree validation.
6867 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6868     type => 'XML script lang',
6869     text => $element_state->{script_type},
6870     level => $self->{level}->{uncertain});
6871     ## ISSUE: Should we raise some kind of error for
6872     ## <script type="text/xml">aaaaa</script>?
6873     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6874 wakaba 1.93 } else {
6875     $self->{onsubdoc}->({s => $element_state->{text},
6876     container_node => $item->{node},
6877     media_type => $element_state->{script_type},
6878     is_char_string => 1});
6879     }
6880 wakaba 1.40
6881     $HTMLChecker{check_end}->(@_);
6882 wakaba 1.1 }
6883     },
6884 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6885     ## NOTE: "When used to include script data, the script data must be embedded
6886     ## inline, the format of the data must be given using the type attribute,
6887     ## and the src attribute must not be specified." - not testable.
6888     ## TODO: It would be possible to err <script type=text/plain src=...>
6889 wakaba 1.1 };
6890 wakaba 1.25 ## ISSUE: Significant check and text child node
6891 wakaba 1.1
6892     ## NOTE: When script is disabled.
6893     $Element->{$HTML_NS}->{noscript} = {
6894 wakaba 1.40 %HTMLTransparentChecker,
6895 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6896 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6897     %HTMLAttrStatus,
6898     %HTMLM12NCommonAttrStatus,
6899 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6900 wakaba 1.49 }),
6901 wakaba 1.40 check_start => sub {
6902     my ($self, $item, $element_state) = @_;
6903 wakaba 1.3
6904 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6905 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6906     level => $self->{level}->{must});
6907 wakaba 1.3 }
6908    
6909 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6910     $self->_add_minus_elements ($element_state,
6911     {$HTML_NS => {noscript => 1}});
6912     }
6913 wakaba 1.79
6914     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6915     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6916 wakaba 1.3 },
6917 wakaba 1.40 check_child_element => sub {
6918     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6919     $child_is_transparent, $element_state) = @_;
6920     if ($self->{flag}->{in_head}) {
6921 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6922     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6923 wakaba 1.40 $self->{onerror}->(node => $child_el,
6924     type => 'element not allowed:minus',
6925 wakaba 1.104 level => $self->{level}->{must});
6926 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6927     #
6928     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6929     #
6930     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6931     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6932     $self->{onerror}->(node => $child_el,
6933     type => 'element not allowed:head noscript',
6934 wakaba 1.104 level => $self->{level}->{must});
6935 wakaba 1.40 }
6936     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6937 wakaba 1.47 my $http_equiv_attr
6938     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6939     if ($http_equiv_attr) {
6940     ## TODO: case
6941     if (lc $http_equiv_attr->value eq 'content-type') {
6942 wakaba 1.40 $self->{onerror}->(node => $child_el,
6943 wakaba 1.34 type => 'element not allowed:head noscript',
6944 wakaba 1.104 level => $self->{level}->{must});
6945 wakaba 1.47 } else {
6946     #
6947 wakaba 1.3 }
6948 wakaba 1.47 } else {
6949     $self->{onerror}->(node => $child_el,
6950     type => 'element not allowed:head noscript',
6951 wakaba 1.104 level => $self->{level}->{must});
6952 wakaba 1.3 }
6953 wakaba 1.40 } else {
6954     $self->{onerror}->(node => $child_el,
6955     type => 'element not allowed:head noscript',
6956 wakaba 1.104 level => $self->{level}->{must});
6957 wakaba 1.40 }
6958     } else {
6959     $HTMLTransparentChecker{check_child_element}->(@_);
6960     }
6961     },
6962     check_child_text => sub {
6963     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6964     if ($self->{flag}->{in_head}) {
6965     if ($has_significant) {
6966     $self->{onerror}->(node => $child_node,
6967 wakaba 1.104 type => 'character not allowed',
6968     level => $self->{level}->{must});
6969 wakaba 1.3 }
6970     } else {
6971 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6972     }
6973     },
6974     check_end => sub {
6975     my ($self, $item, $element_state) = @_;
6976     $self->_remove_minus_elements ($element_state);
6977     if ($self->{flag}->{in_head}) {
6978     $HTMLChecker{check_end}->(@_);
6979     } else {
6980     $HTMLPhrasingContentChecker{check_end}->(@_);
6981 wakaba 1.3 }
6982 wakaba 1.1 },
6983     };
6984 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6985 wakaba 1.1
6986     $Element->{$HTML_NS}->{'event-source'} = {
6987 wakaba 1.40 %HTMLEmptyChecker,
6988 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6989     check_attrs => $GetHTMLAttrsChecker->({
6990     src => $HTMLURIAttrChecker,
6991     }, {
6992     %HTMLAttrStatus,
6993     src => FEATURE_HTML5_LC_DROPPED,
6994     }),
6995     check_start => sub {
6996     my ($self, $item, $element_state) = @_;
6997    
6998     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6999     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7000     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7001     },
7002     };
7003    
7004     $Element->{$HTML_NS}->{eventsource} = {
7005     %HTMLEmptyChecker,
7006 wakaba 1.180 status => FEATURE_HTML5_DROPPED,
7007 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7008 wakaba 1.1 src => $HTMLURIAttrChecker,
7009 wakaba 1.50 }, {
7010     %HTMLAttrStatus,
7011 wakaba 1.180 src => FEATURE_HTML5_DROPPED,
7012 wakaba 1.1 }),
7013 wakaba 1.66 check_start => sub {
7014     my ($self, $item, $element_state) = @_;
7015    
7016     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7017 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7018     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7019 wakaba 1.66 },
7020 wakaba 1.1 };
7021    
7022     $Element->{$HTML_NS}->{details} = {
7023 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
7024 wakaba 1.153 status => FEATURE_HTML5_LC,
7025 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7026 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
7027 wakaba 1.50 }, {
7028     %HTMLAttrStatus,
7029 wakaba 1.153 open => FEATURE_HTML5_LC,
7030 wakaba 1.1 }),
7031     };
7032    
7033     $Element->{$HTML_NS}->{datagrid} = {
7034 wakaba 1.72 %HTMLFlowContentChecker,
7035 wakaba 1.48 status => FEATURE_HTML5_WD,
7036 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7037 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7038     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7039 wakaba 1.50 }, {
7040     %HTMLAttrStatus,
7041     disabled => FEATURE_HTML5_WD,
7042     multiple => FEATURE_HTML5_WD,
7043 wakaba 1.1 }),
7044 wakaba 1.40 check_start => sub {
7045     my ($self, $item, $element_state) = @_;
7046 wakaba 1.1
7047 wakaba 1.40 $self->_add_minus_elements ($element_state,
7048     {$HTML_NS => {a => 1, datagrid => 1}});
7049 wakaba 1.172
7050 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7051     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7052 wakaba 1.40 },
7053     check_end => sub {
7054     my ($self, $item, $element_state) = @_;
7055     $self->_remove_minus_elements ($element_state);
7056 wakaba 1.1
7057 wakaba 1.172 $HTMLFlowContentChecker{check_end}->(@_);
7058 wakaba 1.40 },
7059 wakaba 1.1 };
7060    
7061     $Element->{$HTML_NS}->{command} = {
7062 wakaba 1.40 %HTMLEmptyChecker,
7063 wakaba 1.48 status => FEATURE_HTML5_WD,
7064 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7065 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7066     default => $GetHTMLBooleanAttrChecker->('default'),
7067     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7068     icon => $HTMLURIAttrChecker,
7069     label => sub { }, ## NOTE: No conformance creteria
7070     radiogroup => sub { }, ## NOTE: No conformance creteria
7071     type => sub {
7072     my ($self, $attr) = @_;
7073     my $value = $attr->value;
7074     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7075 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7076     level => $self->{level}->{must});
7077 wakaba 1.1 }
7078     },
7079 wakaba 1.50 }, {
7080     %HTMLAttrStatus,
7081     checked => FEATURE_HTML5_WD,
7082 wakaba 1.175 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7083 wakaba 1.50 disabled => FEATURE_HTML5_WD,
7084     icon => FEATURE_HTML5_WD,
7085     label => FEATURE_HTML5_WD,
7086     radiogroup => FEATURE_HTML5_WD,
7087     type => FEATURE_HTML5_WD,
7088 wakaba 1.1 }),
7089 wakaba 1.66 check_start => sub {
7090     my ($self, $item, $element_state) = @_;
7091    
7092     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7093 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7094     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7095 wakaba 1.66 },
7096 wakaba 1.115 };
7097    
7098     $Element->{$HTML_NS}->{bb} = {
7099     %HTMLPhrasingContentChecker,
7100 wakaba 1.153 status => FEATURE_HTML5_WD,
7101 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7102     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7103     }, {
7104     %HTMLAttrStatus,
7105 wakaba 1.153 type => FEATURE_HTML5_WD,
7106 wakaba 1.115 }),
7107 wakaba 1.130 check_start => sub {
7108     my ($self, $item, $element_state) = @_;
7109     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7110    
7111     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7112     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7113     },
7114     check_end => sub {
7115     my ($self, $item, $element_state) = @_;
7116     $self->_remove_minus_elements ($element_state);
7117    
7118     $HTMLTransparentChecker{check_end}->(@_);
7119     },
7120 wakaba 1.1 };
7121    
7122     $Element->{$HTML_NS}->{menu} = {
7123 wakaba 1.40 %HTMLPhrasingContentChecker,
7124 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7125     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7126     ## NOTE: We don't want any |menu| element warned as deprecated.
7127 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7128 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7129 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7130 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7131     ## implementation, it does not match.)
7132 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7133     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7134 wakaba 1.49 }, {
7135     %HTMLAttrStatus,
7136     %HTMLM12NCommonAttrStatus,
7137 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7138 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7139 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7140 wakaba 1.50 label => FEATURE_HTML5_WD,
7141 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7142 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7143     sdapref => FEATURE_HTML20_RFC,
7144 wakaba 1.50 type => FEATURE_HTML5_WD,
7145 wakaba 1.1 }),
7146 wakaba 1.40 check_start => sub {
7147     my ($self, $item, $element_state) = @_;
7148     $element_state->{phase} = 'li or phrasing';
7149     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7150     $self->{flag}->{in_menu} = 1;
7151 wakaba 1.79
7152     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7153     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7154 wakaba 1.135 $element_state->{id_type} = 'menu';
7155 wakaba 1.40 },
7156     check_child_element => sub {
7157     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7158     $child_is_transparent, $element_state) = @_;
7159 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7160     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7161 wakaba 1.40 $self->{onerror}->(node => $child_el,
7162     type => 'element not allowed:minus',
7163 wakaba 1.104 level => $self->{level}->{must});
7164 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7165     #
7166     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7167     if ($element_state->{phase} eq 'li') {
7168     #
7169     } elsif ($element_state->{phase} eq 'li or phrasing') {
7170     $element_state->{phase} = 'li';
7171     } else {
7172 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7173     level => $self->{level}->{must});
7174 wakaba 1.40 }
7175     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7176     if ($element_state->{phase} eq 'phrasing') {
7177     #
7178     } elsif ($element_state->{phase} eq 'li or phrasing') {
7179     $element_state->{phase} = 'phrasing';
7180     } else {
7181 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7182     level => $self->{level}->{must});
7183 wakaba 1.40 }
7184     } else {
7185 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7186     level => $self->{level}->{must});
7187 wakaba 1.40 }
7188     },
7189     check_child_text => sub {
7190     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7191     if ($has_significant) {
7192     if ($element_state->{phase} eq 'phrasing') {
7193     #
7194     } elsif ($element_state->{phase} eq 'li or phrasing') {
7195     $element_state->{phase} = 'phrasing';
7196     } else {
7197     $self->{onerror}->(node => $child_node,
7198 wakaba 1.104 type => 'character not allowed',
7199     level => $self->{level}->{must});
7200 wakaba 1.1 }
7201     }
7202 wakaba 1.40 },
7203     check_end => sub {
7204     my ($self, $item, $element_state) = @_;
7205     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7206    
7207     if ($element_state->{phase} eq 'li') {
7208     $HTMLChecker{check_end}->(@_);
7209     } else { # 'phrasing' or 'li or phrasing'
7210     $HTMLPhrasingContentChecker{check_end}->(@_);
7211 wakaba 1.1 }
7212     },
7213 wakaba 1.8 };
7214    
7215     $Element->{$HTML_NS}->{datatemplate} = {
7216 wakaba 1.40 %HTMLChecker,
7217 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7218 wakaba 1.40 check_child_element => sub {
7219     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7220     $child_is_transparent, $element_state) = @_;
7221 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7222     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7223 wakaba 1.40 $self->{onerror}->(node => $child_el,
7224     type => 'element not allowed:minus',
7225 wakaba 1.104 level => $self->{level}->{must});
7226 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7227     #
7228     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7229     #
7230     } else {
7231     $self->{onerror}->(node => $child_el,
7232 wakaba 1.104 type => 'element not allowed:datatemplate',
7233     level => $self->{level}->{must});
7234 wakaba 1.40 }
7235     },
7236     check_child_text => sub {
7237     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7238     if ($has_significant) {
7239 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7240     level => $self->{level}->{must});
7241 wakaba 1.8 }
7242     },
7243     is_xml_root => 1,
7244     };
7245    
7246     $Element->{$HTML_NS}->{rule} = {
7247 wakaba 1.40 %HTMLChecker,
7248 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7249 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7250 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7251 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7252 wakaba 1.50 }, {
7253     %HTMLAttrStatus,
7254     condition => FEATURE_HTML5_AT_RISK,
7255     mode => FEATURE_HTML5_AT_RISK,
7256 wakaba 1.8 }),
7257 wakaba 1.40 check_start => sub {
7258     my ($self, $item, $element_state) = @_;
7259 wakaba 1.79
7260 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7261 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7262     $self->{flag}->{in_rule} = 1;
7263    
7264     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7265     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7266 wakaba 1.40 },
7267     check_child_element => sub { },
7268     check_child_text => sub { },
7269     check_end => sub {
7270     my ($self, $item, $element_state) = @_;
7271 wakaba 1.79
7272 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7273 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7274    
7275 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7276 wakaba 1.8 },
7277     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7278     ## is applied to some conforming data, results in a conforming DOM tree.":
7279     ## We don't check against this.
7280     };
7281    
7282     $Element->{$HTML_NS}->{nest} = {
7283 wakaba 1.40 %HTMLEmptyChecker,
7284 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7285 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7286 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7287     mode => sub {
7288     my ($self, $attr) = @_;
7289     my $value = $attr->value;
7290 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7291 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7292     level => $self->{level}->{must});
7293 wakaba 1.23 }
7294     },
7295 wakaba 1.50 }, {
7296     %HTMLAttrStatus,
7297     filter => FEATURE_HTML5_AT_RISK,
7298     mode => FEATURE_HTML5_AT_RISK,
7299 wakaba 1.8 }),
7300 wakaba 1.1 };
7301    
7302     $Element->{$HTML_NS}->{legend} = {
7303 wakaba 1.40 %HTMLPhrasingContentChecker,
7304 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7305 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7306     # align => $GetHTMLEnumeratedAttrChecker->({
7307     # top => 1, bottom => 1, left => 1, right => 1,
7308     # }),
7309 wakaba 1.167 form => $HTMLFormAttrChecker,
7310 wakaba 1.52 }, {
7311 wakaba 1.49 %HTMLAttrStatus,
7312     %HTMLM12NCommonAttrStatus,
7313 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7314 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7315 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7316 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7317 wakaba 1.49 }),
7318 wakaba 1.170 check_child_element => sub {
7319     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7320     $child_is_transparent, $element_state) = @_;
7321     if ($item->{parent_state}->{in_figure}) {
7322     $HTMLFlowContentChecker{check_child_element}->(@_);
7323     } else {
7324     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7325     }
7326     },
7327     check_child_text => sub {
7328     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7329     if ($item->{parent_state}->{in_figure}) {
7330     $HTMLFlowContentChecker{check_child_text}->(@_);
7331     } else {
7332     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7333     }
7334     },
7335     check_start => sub {
7336     my ($self, $item, $element_state) = @_;
7337     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7338    
7339     $HTMLFlowContentChecker{check_start}->(@_);
7340     },
7341     check_end => sub {
7342     my ($self, $item, $element_state) = @_;
7343     $self->_remove_minus_elements ($element_state);
7344    
7345     $HTMLFlowContentChecker{check_end}->(@_);
7346     },
7347     }; # legend
7348 wakaba 1.1
7349     $Element->{$HTML_NS}->{div} = {
7350 wakaba 1.72 %HTMLFlowContentChecker,
7351 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
7352 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7353     align => $GetHTMLEnumeratedAttrChecker->({
7354     left => 1, center => 1, right => 1, justify => 1,
7355     }),
7356     }, {
7357 wakaba 1.49 %HTMLAttrStatus,
7358 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7359 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7360     datafld => FEATURE_HTML4_REC_RESERVED,
7361     dataformatas => FEATURE_HTML4_REC_RESERVED,
7362     datasrc => FEATURE_HTML4_REC_RESERVED,
7363 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7364 wakaba 1.49 }),
7365 wakaba 1.66 check_start => sub {
7366     my ($self, $item, $element_state) = @_;
7367    
7368     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7369 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7370     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7371 wakaba 1.66 },
7372 wakaba 1.1 };
7373    
7374 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7375 wakaba 1.72 %HTMLFlowContentChecker,
7376 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7377     check_attrs => $GetHTMLAttrsChecker->({}, {
7378     %HTMLAttrStatus,
7379     %HTMLM12NCommonAttrStatus,
7380 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7381 wakaba 1.64 }),
7382     };
7383    
7384 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7385 wakaba 1.40 %HTMLTransparentChecker,
7386 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7387 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7388     ## TODO: HTML4 |size|, |color|, |face|
7389 wakaba 1.49 }, {
7390     %HTMLAttrStatus,
7391 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7392 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7393 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7394 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7395 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7396     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7397 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7398 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7399     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7400 wakaba 1.49 }),
7401 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7402     ## it is allowed only in a document with the WYSIWYG signature. The
7403     ## checker does not check whether there is the signature, since the
7404     ## signature is dropped, too, and has never been implemented. (In addition,
7405     ## for any |font| element an "element not defined" error is raised anyway,
7406     ## such that we don't have to raise an additional error.)
7407 wakaba 1.1 };
7408 wakaba 1.49
7409 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7410     %HTMLEmptyChecker,
7411     status => FEATURE_M12N10_REC_DEPRECATED,
7412     check_attrs => $GetHTMLAttrsChecker->({
7413     ## TODO: color, face, size
7414     }, {
7415     %HTMLAttrStatus,
7416     color => FEATURE_M12N10_REC_DEPRECATED,
7417     face => FEATURE_M12N10_REC_DEPRECATED,
7418 wakaba 1.153 #id => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
7419     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7420 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7421     }),
7422     };
7423    
7424 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7425     ## class title id cols rows onload onunload style(x10)
7426     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7427     ## noframes Common, lang(xhtml10)
7428    
7429 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7430 wakaba 1.56
7431 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7432     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7433     ## xmp, listing sdapref[HTML2,0]
7434    
7435 wakaba 1.56 =pod
7436    
7437 wakaba 1.61 HTML 2.0 nextid @n
7438    
7439     RFC 2659: CERTS CRYPTOPTS
7440    
7441     ISO-HTML: pre-html, divN
7442 wakaba 1.82
7443     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7444     di (Common), nl (Common), handler (Common, type), standby (Common),
7445     summary (Common)
7446    
7447 wakaba 1.97 Access & XHTML2: access (LC)
7448 wakaba 1.82
7449     XML Events & XForms (for XHTML2 support; very, very low priority)
7450 wakaba 1.61
7451 wakaba 1.56 =cut
7452 wakaba 1.61
7453     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7454     ## We added them only to |a|. |link| and |form| might also allow them
7455     ## in theory.
7456 wakaba 1.1
7457     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7458    
7459     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24