/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.176 - (hide annotations) (download)
Sun Jul 5 06:46:16 2009 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.175: +34 -46 lines
++ whatpm/t/dom-conformance/ChangeLog	5 Jul 2009 06:46:07 -0000
	* html-global-1.dat, html-form-input-1.dat: Now that |accesskey|
	attribute is allowed as a global attribute with new syntax
	definition, related test reults are revised and some new tests are
	added (HTML5 revision 3065).

2009-07-05  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	5 Jul 2009 06:45:08 -0000
	* HTML.pm: Allow |accesskey| on all elements (HTML5 revision
	3065).

2009-07-05  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.174 ## --- Feature Status ---
10    
11 wakaba 1.89 sub FEATURE_HTML5_COMPLETE () {
12 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
13 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16 wakaba 1.154 sub FEATURE_HTML5_CR () {
17     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21 wakaba 1.54 sub FEATURE_HTML5_LC () {
22 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
23 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
24     Whatpm::ContentChecker::FEATURE_ALLOWED
25     }
26     sub FEATURE_HTML5_AT_RISK () {
27 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
28     ## status.
29 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31     }
32     sub FEATURE_HTML5_WD () {
33 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
34 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
35     Whatpm::ContentChecker::FEATURE_ALLOWED
36     }
37     sub FEATURE_HTML5_FD () {
38 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
39 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
40     Whatpm::ContentChecker::FEATURE_ALLOWED
41     }
42     sub FEATURE_HTML5_DEFAULT () {
43 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
44 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
45     Whatpm::ContentChecker::FEATURE_ALLOWED
46 wakaba 1.49 }
47 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
48 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
49     ## comments, but then dropped.
50 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
51     }
52 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
53 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
54     ## then dropped.
55 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
56     }
57 wakaba 1.154
58 wakaba 1.119 sub FEATURE_WF2X () {
59 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
60     ## incorporated into the HTML5 spec.
61 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
62     }
63 wakaba 1.54 sub FEATURE_WF2 () {
64 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
65     ## merged into HTML5.
66 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
67 wakaba 1.54 }
68 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
69 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
70     ## were not merged into HTML5.
71 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
72     }
73 wakaba 1.49
74 wakaba 1.154 sub FEATURE_RDFA_REC () {
75     Whatpm::ContentChecker::FEATURE_STATUS_REC
76 wakaba 1.121 }
77 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
78     ## NOTE: The feature that was defined in a RDFa last call working
79     ## draft, but then dropped.
80 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
81     }
82 wakaba 1.58
83     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
84     ## attribute can be used- the only requirements for that matter is:
85     ## "the attribute MUST be referenced using its namespace-qualified form" (and
86     ## this is a host language conformance!).
87 wakaba 1.82 sub FEATURE_ROLE_LC () {
88     Whatpm::ContentChecker::FEATURE_STATUS_LC
89     }
90    
91     sub FEATURE_XHTML2_ED () {
92 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
93     ## "http://www.w3.org/1999/xhtml".
94 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
95     }
96 wakaba 1.58
97 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
98 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
99     ## M12N).
100     Whatpm::ContentChecker::FEATURE_STATUS_REC
101 wakaba 1.55 }
102     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
103 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
104     ## features.
105     Whatpm::ContentChecker::FEATURE_STATUS_REC |
106 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
107     }
108    
109 wakaba 1.154 sub FEATURE_RUBY_REC () {
110     Whatpm::ContentChecker::FEATURE_STATUS_CR
111 wakaba 1.82 }
112    
113 wakaba 1.154 sub FEATURE_M12N11_LC () {
114     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
115     Whatpm::ContentChecker::FEATURE_STATUS_REC;
116 wakaba 1.99 }
117    
118 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
119     ## It contains a number of problems. (However, again, it's a REC!)
120 wakaba 1.54 sub FEATURE_M12N10_REC () {
121 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
122 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
123     }
124     sub FEATURE_M12N10_REC_DEPRECATED () {
125     Whatpm::ContentChecker::FEATURE_STATUS_REC |
126     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
127     }
128 wakaba 1.49
129     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
130     ## (second edition). Only missing attributes from M12N10 abstract
131     ## definition are added.
132 wakaba 1.54 sub FEATURE_XHTML10_REC () {
133     Whatpm::ContentChecker::FEATURE_STATUS_CR
134     }
135    
136 wakaba 1.61 ## NOTE: Diff from HTML4.
137     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
138     Whatpm::ContentChecker::FEATURE_STATUS_CR
139     }
140 wakaba 1.58
141 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
142     ## 4.01). Only missing attributes from XHTML10 are added.
143 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
144     Whatpm::ContentChecker::FEATURE_STATUS_WD
145     }
146    
147     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
148     ## rather than presentational attributes (deprecated or not deprecated).
149 wakaba 1.48
150 wakaba 1.61 ## NOTE: Diff from HTML4.
151     sub FEATURE_HTML32_REC_OBSOLETE () {
152     Whatpm::ContentChecker::FEATURE_STATUS_CR |
153     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
154     ## NOTE: Lowercase normative "should".
155     }
156    
157     sub FEATURE_RFC2659 () { ## Experimental RFC
158     Whatpm::ContentChecker::FEATURE_STATUS_CR
159     }
160    
161     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
162     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
163     Whatpm::ContentChecker::FEATURE_STATUS_CR
164     }
165    
166     ## NOTE: Diff from HTML 2.0.
167     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
168     Whatpm::ContentChecker::FEATURE_STATUS_CR
169     }
170    
171     ## NOTE: Diff from HTML 3.2.
172     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
173     Whatpm::ContentChecker::FEATURE_STATUS_CR
174     }
175 wakaba 1.58
176 wakaba 1.174 ## --- Content Model ---
177    
178 wakaba 1.29 ## December 2007 HTML5 Classification
179    
180     my $HTMLMetadataContent = {
181     $HTML_NS => {
182     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
183 wakaba 1.118 'event-source' => 1, eventsource => 1,
184     command => 1, datatemplate => 1,
185 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
186     ## a metadata content other than |head| element.
187     meta => 1,
188     },
189     ## NOTE: RDF is mentioned in the HTML5 spec.
190     ## TODO: Other RDF elements?
191     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
192     };
193    
194 wakaba 1.72 my $HTMLFlowContent = {
195 wakaba 1.29 $HTML_NS => {
196     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
197     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
198     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
199     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
200 wakaba 1.119 form => 1, fieldset => 1,
201 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
202     datagrid => 1, ## ISSUE: "Flow element" in spec.
203 wakaba 1.29 datatemplate => 1,
204     div => 1, ## ISSUE: No category in spec.
205     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
206     ## Additionally, it must be before any other element or
207     ## non-inter-element-whitespace text node.
208     style => 1,
209    
210 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
211 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
212     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
213 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
214 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
215     command => 1, bb => 1,
216 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
217 wakaba 1.121 textarea => 1, output => 1,
218 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
219     ## NOTE: |area| is allowed only as a descendant of |map|.
220     area => 1,
221    
222 wakaba 1.124 ## NOTE: Transparent.
223     a => 1, ins => 1, del => 1, font => 1,
224 wakaba 1.29
225 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
226 wakaba 1.29 menu => 1,
227    
228     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
229     canvas => 1,
230     },
231    
232     ## NOTE: Embedded
233     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
234     q<http://www.w3.org/2000/svg> => {svg => 1},
235     };
236    
237 wakaba 1.58 my $HTMLSectioningContent = {
238 wakaba 1.57 $HTML_NS => {
239     section => 1, nav => 1, article => 1, aside => 1,
240     ## NOTE: |body| is only allowed in |html| element.
241     body => 1,
242     },
243     };
244    
245 wakaba 1.58 my $HTMLSectioningRoot = {
246 wakaba 1.29 $HTML_NS => {
247 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
248 wakaba 1.29 },
249     };
250    
251     my $HTMLHeadingContent = {
252     $HTML_NS => {
253     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
254     },
255     };
256    
257     my $HTMLPhrasingContent = {
258 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
259 wakaba 1.29 $HTML_NS => {
260 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
261 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
262     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
263 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
264 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
265     command => 1, bb => 1,
266 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
267 wakaba 1.121 textarea => 1, output => 1,
268 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
269     ## NOTE: |area| is allowed only as a descendant of |map|.
270     area => 1,
271    
272     ## NOTE: Transparent.
273 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
274 wakaba 1.29
275 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
276 wakaba 1.29 menu => 1,
277    
278     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
279     canvas => 1,
280     },
281    
282     ## NOTE: Embedded
283     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
284     q<http://www.w3.org/2000/svg> => {svg => 1},
285    
286     ## NOTE: And non-inter-element-whitespace text nodes.
287     };
288    
289 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
290 wakaba 1.29
291     my $HTMLInteractiveContent = {
292     $HTML_NS => {
293     a => 1,
294 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
295     details => 1, datagrid => 1, bb => 1,
296    
297     ## NOTE: When "controls" attribute is specified.
298     video => 1, audio => 1,
299    
300     ## NOTE: When "type=toolbar" attribute is specified.
301     menu => 1,
302 wakaba 1.29 },
303     };
304    
305 wakaba 1.139 ## NOTE: Labelable form-associated element.
306     my $LabelableFAE = {
307     $HTML_NS => {
308     input => 1, button => 1, select => 1, textarea => 1,
309     },
310     };
311    
312 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
313    
314 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
315     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
316    
317     ## -- Common attribute syntacx checkers
318    
319 wakaba 1.1 our $AttrChecker;
320 wakaba 1.82 our $AttrStatus;
321 wakaba 1.1
322     my $GetHTMLEnumeratedAttrChecker = sub {
323     my $states = shift; # {value => conforming ? 1 : -1}
324     return sub {
325     my ($self, $attr) = @_;
326     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
327     if ($states->{$value} > 0) {
328     #
329     } elsif ($states->{$value}) {
330 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
331     level => $self->{level}->{must});
332 wakaba 1.1 } else {
333 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
334     level => $self->{level}->{must});
335 wakaba 1.1 }
336     };
337     }; # $GetHTMLEnumeratedAttrChecker
338    
339     my $GetHTMLBooleanAttrChecker = sub {
340     my $local_name = shift;
341     return sub {
342     my ($self, $attr) = @_;
343 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
344 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
345 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
346 wakaba 1.104 level => $self->{level}->{must});
347 wakaba 1.1 }
348     };
349     }; # $GetHTMLBooleanAttrChecker
350    
351 wakaba 1.8 ## Unordered set of space-separated tokens
352 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
353     my $allowed_words = shift;
354     return sub {
355     my ($self, $attr) = @_;
356     my %word;
357 wakaba 1.132 for my $word (grep {length $_}
358     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
359 wakaba 1.92 unless ($word{$word}) {
360     $word{$word} = 1;
361     if (not defined $allowed_words or
362     $allowed_words->{$word}) {
363     #
364     } else {
365 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
366 wakaba 1.92 value => $word,
367 wakaba 1.104 level => $self->{level}->{must});
368 wakaba 1.92 }
369     } else {
370 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
371     value => $word,
372     level => $self->{level}->{must});
373 wakaba 1.92 }
374 wakaba 1.8 }
375 wakaba 1.92 };
376     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
377 wakaba 1.8
378 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
379 wakaba 1.1 ## whose allowed values are defined by the section on link types)
380     my $HTMLLinkTypesAttrChecker = sub {
381 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
382 wakaba 1.1 my %word;
383 wakaba 1.132 for my $word (grep {length $_}
384     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
385 wakaba 1.1 unless ($word{$word}) {
386     $word{$word} = 1;
387 wakaba 1.18 } elsif ($word eq 'up') {
388     #
389 wakaba 1.1 } else {
390 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
391     value => $word,
392     level => $self->{level}->{must});
393 wakaba 1.1 }
394     }
395     ## NOTE: Case sensitive match (since HTML5 spec does not say link
396     ## types are case-insensitive and it says "The value should not
397     ## be confusingly similar to any other defined value (e.g.
398     ## differing only in case).").
399     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
400     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
401     ## values to be used conformingly.
402 wakaba 1.66
403     my $is_hyperlink;
404     my $is_resource;
405 wakaba 1.1 require Whatpm::_LinkTypeList;
406     our $LinkType;
407     for my $word (keys %word) {
408     my $def = $LinkType->{$word};
409     if (defined $def) {
410     if ($def->{status} eq 'accepted') {
411     if (defined $def->{effect}->[$a_or_area]) {
412     #
413     } else {
414     $self->{onerror}->(node => $attr,
415 wakaba 1.104 type => 'link type:bad context',
416     value => $word,
417 wakaba 1.110 level => $self->{level}->{must});
418 wakaba 1.1 }
419     } elsif ($def->{status} eq 'proposal') {
420 wakaba 1.104 $self->{onerror}->(node => $attr,
421     type => 'link type:proposed',
422     value => $word,
423     level => $self->{level}->{should});
424 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
425     #
426     } else {
427     $self->{onerror}->(node => $attr,
428 wakaba 1.104 type => 'link type:bad context',
429     value => $word,
430     level => $self->{level}->{must});
431 wakaba 1.20 }
432 wakaba 1.1 } else { # rejected or synonym
433     $self->{onerror}->(node => $attr,
434 wakaba 1.104 type => 'link type:non-conforming',
435     value => $word,
436     level => $self->{level}->{must});
437 wakaba 1.1 }
438 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
439     if ($word eq 'alternate') {
440     #
441     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
442 wakaba 1.66 $is_hyperlink = 1;
443 wakaba 1.4 }
444     }
445 wakaba 1.1 if ($def->{unique}) {
446     unless ($self->{has_link_type}->{$word}) {
447     $self->{has_link_type}->{$word} = 1;
448     } else {
449     $self->{onerror}->(node => $attr,
450 wakaba 1.104 type => 'link type:duplicate',
451     value => $word,
452     level => $self->{level}->{must});
453 wakaba 1.1 }
454     }
455 wakaba 1.66
456     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
457     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
458     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
459     }
460 wakaba 1.1 } else {
461 wakaba 1.104 $self->{onerror}->(node => $attr,
462     type => 'unknown link type',
463     value => $word,
464     level => $self->{level}->{uncertain});
465 wakaba 1.1 }
466     }
467 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
468 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
469     ## says that using both X-Pingback: header field and HTML
470     ## <link rel=pingback> is deprecated and if both appears they
471     ## SHOULD contain exactly the same value.
472     ## ISSUE: Pingback 1.0 specification defines the exact representation
473     ## of its link element, which cannot be tested by the current arch.
474     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
475     ## include any string that matches to the pattern for the rel=pingback link,
476     ## which again inpossible to test.
477     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
478 wakaba 1.12
479     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
480 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
481     ## then they SHOULD be described in different paragraphs.".
482 wakaba 1.66
483     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
484     if ($is_hyperlink or $a_or_area) {
485     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
486     }
487     if ($is_resource and not $a_or_area) {
488     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
489     }
490 wakaba 1.96
491     $element_state->{link_rel} = \%word;
492 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
493 wakaba 1.20
494     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
495 wakaba 1.1
496     ## URI (or IRI)
497     my $HTMLURIAttrChecker = sub {
498 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
499 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
500     my $value = $attr->value;
501     Whatpm::URIChecker->check_iri_reference ($value, sub {
502 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
503 wakaba 1.106 }), $self->{level};
504 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
505 wakaba 1.66
506     my $attr_name = $attr->name;
507     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
508     ## TODO: absolute
509     push @{$self->{return}->{uri}->{$value} ||= []},
510     $element_state->{uri_info}->{$attr_name};
511 wakaba 1.1 }; # $HTMLURIAttrChecker
512    
513     ## A space separated list of one or more URIs (or IRIs)
514     my $HTMLSpaceURIsAttrChecker = sub {
515     my ($self, $attr) = @_;
516 wakaba 1.66
517     my $type = {ping => 'action',
518     profile => 'namespace',
519     archive => 'resource'}->{$attr->name};
520    
521 wakaba 1.1 my $i = 0;
522 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
523 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
524 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
525 wakaba 1.106 }, $self->{level});
526 wakaba 1.66
527     ## TODO: absolute
528     push @{$self->{return}->{uri}->{$value} ||= []},
529 wakaba 1.67 {node => $attr, type => {$type => 1}};
530 wakaba 1.66
531 wakaba 1.1 $i++;
532     }
533 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
534 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
535     ## ISSUE: A sequence of white space characters are conformant?
536     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
537 wakaba 1.132 ## ISSUE: What is "space"?
538 wakaba 1.1 ## NOTE: Duplication seems not an error.
539 wakaba 1.4 $self->{has_uri_attr} = 1;
540 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
541    
542 wakaba 1.156 my $ValidEmailAddress;
543     {
544     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
545     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
546     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
547     }
548    
549 wakaba 1.168 ## Valid global date and time.
550     my $GetDateTimeAttrChecker = sub ($) {
551     my $type = shift;
552     return sub {
553     my ($self, $attr, $item, $element_state) = @_;
554    
555     my $range_error;
556    
557     require Message::Date;
558     my $dp = Message::Date->new;
559     $dp->{level} = $self->{level};
560     $dp->{onerror} = sub {
561     my %opt = @_;
562     unless ($opt{type} eq 'date value not supported') {
563     $self->{onerror}->(%opt, node => $attr);
564     $range_error = '';
565     }
566     };
567    
568     my $method = 'parse_' . $type;
569     my $d = $dp->$method ($attr->value);
570     $element_state->{date_value}->{$attr->name} = $d || $range_error;
571     };
572     }; # $GetDateTimeAttrChecker
573 wakaba 1.1
574     my $HTMLIntegerAttrChecker = sub {
575     my ($self, $attr) = @_;
576     my $value = $attr->value;
577     unless ($value =~ /\A-?[0-9]+\z/) {
578 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
579     level => $self->{level}->{must});
580 wakaba 1.1 }
581     }; # $HTMLIntegerAttrChecker
582    
583     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
584     my $range_check = shift;
585     return sub {
586     my ($self, $attr) = @_;
587     my $value = $attr->value;
588     if ($value =~ /\A[0-9]+\z/) {
589     unless ($range_check->($value + 0)) {
590 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
591     level => $self->{level}->{must});
592 wakaba 1.1 }
593     } else {
594     $self->{onerror}->(node => $attr,
595 wakaba 1.104 type => 'nninteger:syntax error',
596     level => $self->{level}->{must});
597 wakaba 1.1 }
598     };
599     }; # $GetHTMLNonNegativeIntegerAttrChecker
600    
601     my $GetHTMLFloatingPointNumberAttrChecker = sub {
602     my $range_check = shift;
603     return sub {
604 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
605 wakaba 1.1 my $value = $attr->value;
606 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
607     $value =~ /\A-?\.[0-9]+\z/) {
608 wakaba 1.168 if ($range_check->($value + 0)) {
609     ## TODO: parse algorithm
610     $element_state->{number_value}->{$attr->name} = $value + 0;
611     } else {
612 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
613     level => $self->{level}->{must});
614 wakaba 1.1 }
615     } else {
616     $self->{onerror}->(node => $attr,
617 wakaba 1.104 type => 'float:syntax error',
618     level => $self->{level}->{must});
619 wakaba 1.1 }
620     };
621 wakaba 1.144
622     ## TODO: scientific notation
623 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
624    
625 wakaba 1.148 my $StepAttrChecker = sub {
626     ## NOTE: A valid floating point number (> 0), or ASCII
627     ## case-insensitive "any".
628    
629     my ($self, $attr) = @_;
630     my $value = $attr->value;
631     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
632     $value =~ /\A-?\.[0-9]+\z/) {
633     unless ($value > 0) {
634     $self->{onerror}->(node => $attr, type => 'float:out of range',
635     level => $self->{level}->{must});
636     }
637     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
638     #
639     } else {
640     $self->{onerror}->(node => $attr,
641     type => 'float:syntax error',
642     level => $self->{level}->{must});
643     }
644    
645     ## TODO: scientific
646     }; # $StepAttrChecker
647    
648 wakaba 1.86 ## HTML4 %Length;
649     my $HTMLLengthAttrChecker = sub {
650     my ($self, $attr) = @_;
651     my $value = $attr->value;
652     unless ($value =~ /\A[0-9]+%?\z/) {
653     $self->{onerror}->(node => $attr, type => 'length:syntax error',
654 wakaba 1.104 level => $self->{level}->{must});
655 wakaba 1.86 }
656    
657     ## NOTE: HTML4 definition is too vague - it does not define the syntax
658     ## of percentage value at all (!).
659     }; # $HTMLLengthAttrChecker
660    
661 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
662     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
663     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
664    
665 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
666     ## ISSUE: RFC 2046 does not define syntax of media types.
667     ## ISSUE: The definition of "a valid MIME type" is unknown.
668     ## Syntactical correctness?
669     my $HTMLIMTAttrChecker = sub {
670     my ($self, $attr) = @_;
671     my $value = $attr->value;
672     ## ISSUE: RFC 2045 Content-Type header field allows insertion
673     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
674     ## ISSUE: RFC 2231 extension? Maybe no.
675     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
676     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
677 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
678 wakaba 1.1 my @type = ($1, $2);
679     my $param = $3;
680 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
681 wakaba 1.1 if (defined $2) {
682     push @type, $1 => $2;
683     } else {
684     my $n = $1;
685 wakaba 1.152 my $v = $3;
686 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
687 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
688 wakaba 1.1 }
689     }
690     require Whatpm::IMTChecker;
691 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
692     $ic->{level} = $self->{level};
693     $ic->check_imt (sub {
694 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
695 wakaba 1.1 }, @type);
696     } else {
697 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
698     level => $self->{level}->{must});
699 wakaba 1.1 }
700     }; # $HTMLIMTAttrChecker
701    
702     my $HTMLLanguageTagAttrChecker = sub {
703 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
704    
705 wakaba 1.1 my ($self, $attr) = @_;
706 wakaba 1.6 my $value = $attr->value;
707     require Whatpm::LangTag;
708     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
709 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
710 wakaba 1.106 }, $self->{level});
711 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
712 wakaba 1.6
713     ## TODO: testdata
714 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
715    
716     ## "A valid media query [MQ]"
717     my $HTMLMQAttrChecker = sub {
718     my ($self, $attr) = @_;
719 wakaba 1.104 $self->{onerror}->(node => $attr,
720     type => 'media query',
721     level => $self->{level}->{uncertain});
722 wakaba 1.1 ## ISSUE: What is "a valid media query"?
723     }; # $HTMLMQAttrChecker
724    
725     my $HTMLEventHandlerAttrChecker = sub {
726     my ($self, $attr) = @_;
727 wakaba 1.104 $self->{onerror}->(node => $attr,
728     type => 'event handler',
729     level => $self->{level}->{uncertain});
730 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
731     ## ECMAScript |FunctionBody| production. [ECMA262]
732     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
733     ## ISSUE: Automatic semicolon insertion does not apply?
734     ## ISSUE: Other script languages?
735     }; # $HTMLEventHandlerAttrChecker
736    
737 wakaba 1.136 my $HTMLFormAttrChecker = sub {
738     my ($self, $attr) = @_;
739    
740     ## NOTE: MUST be the ID of a |form| element.
741    
742     my $value = $attr->value;
743 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
744 wakaba 1.136
745     ## ISSUE: <form id=""><input form=""> (empty ID)?
746     }; # $HTMLFormAttrChecker
747    
748 wakaba 1.158 my $ListAttrChecker = sub {
749     my ($self, $attr) = @_;
750    
751     ## NOTE: MUST be the ID of a |datalist| element.
752    
753     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
754    
755     ## TODO: Warn violation to control-dependent restrictions. For
756     ## example, |<input type=url maxlength=10 list=a> <datalist
757     ## id=a><option value=nonurlandtoolong></datalist>| should be
758     ## warned.
759     }; # $ListAttrChecker
760    
761 wakaba 1.160 my $PatternAttrChecker = sub {
762     my ($self, $attr) = @_;
763     $self->{onsubdoc}->({s => $attr->value,
764     container_node => $attr,
765     media_type => 'text/x-regexp-js',
766     is_char_string => 1});
767 wakaba 1.161
768     ## ISSUE: "value must match the Pattern production of ECMA 262's
769     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
770    
771     ## TODO: Warn if @value does not match @pattern.
772 wakaba 1.160 }; # $PatternAttrChecker
773    
774 wakaba 1.161 my $AcceptAttrChecker = sub {
775     my ($self, $attr) = @_;
776    
777     my $value = $attr->value;
778     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
779     my @value = length $value ? split /,/, $value, -1 : ('');
780     my %has_value;
781     for my $v (@value) {
782     if ($has_value{$v}) {
783     $self->{onerror}->(node => $attr,
784     type => 'duplicate token',
785     value => $v,
786     level => $self->{level}->{must});
787     next;
788     }
789     $has_value{$v} = 1;
790    
791     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
792     #
793     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
794     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
795     ## define its own syntax citing RFC 4288.
796    
797     ## NOTE: Parameters not allowed.
798     require Whatpm::IMTChecker;
799     my $ic = Whatpm::IMTChecker->new;
800     $ic->{level} = $self->{level};
801     $ic->check_imt (sub {
802     $self->{onerror}->(@_, node => $attr);
803     }, $1, $2);
804     } else {
805     $self->{onerror}->(node => $attr,
806     type => 'IMTnp:syntax error', ## TODOC: type
807     value => $v,
808     level => $self->{level}->{must});
809     }
810     }
811     }; # $AcceptAttrChecker
812    
813 wakaba 1.165 my $FormControlNameAttrChecker = sub {
814     my ($self, $attr) = @_;
815    
816     unless (length $attr->value) {
817     $self->{onerror}->(node => $attr,
818     type => 'empty control name', ## TODOC: type
819     level => $self->{level}->{must});
820     }
821    
822     ## NOTE: No uniqueness constraint.
823     }; # $FormControlNameAttrChecker
824    
825     my $AutofocusAttrChecker = sub {
826     my ($self, $attr) = @_;
827    
828     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
829    
830     if ($self->{has_autofocus}) {
831     $self->{onerror}->(node => $attr,
832     type => 'duplicate autofocus', ## TODOC: type
833     level => $self->{level}->{must});
834     }
835     $self->{has_autofocus} = 1;
836     }; # $AutofocusAttrChekcer
837    
838 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
839     my ($self, $attr) = @_;
840 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
841 wakaba 1.1 my $value = $attr->value;
842     if ($value =~ s/^#//) {
843 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
844     ## according to the "rules for parsing a hash-name reference" algorithm.
845     ## The document is non-conforming anyway, since |<map name="">| (empty
846     ## name) is non-conforming.
847 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
848     } else {
849 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
850     level => $self->{level}->{must});
851 wakaba 1.1 }
852 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
853 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
854     }; # $HTMLUsemapAttrChecker
855    
856 wakaba 1.76 ## Valid browsing context name
857     my $HTMLBrowsingContextNameAttrChecker = sub {
858     my ($self, $attr) = @_;
859     my $value = $attr->value;
860     if ($value =~ /^_/) {
861     $self->{onerror}->(node => $attr, type => 'window name:reserved',
862 wakaba 1.104 level => $self->{level}->{must},
863 wakaba 1.76 value => $value);
864     } elsif (length $value) {
865     #
866     } else {
867     $self->{onerror}->(node => $attr, type => 'window name:empty',
868 wakaba 1.104 level => $self->{level}->{must});
869 wakaba 1.76 }
870     }; # $HTMLBrowsingContextNameAttrChecker
871    
872     ## Valid browsing context name or keyword
873 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
874     my ($self, $attr) = @_;
875     my $value = $attr->value;
876     if ($value =~ /^_/) {
877     $value = lc $value; ## ISSUE: ASCII case-insentitive?
878     unless ({
879 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
880 wakaba 1.1 }->{$value}) {
881     $self->{onerror}->(node => $attr,
882 wakaba 1.76 type => 'window name:reserved',
883 wakaba 1.104 level => $self->{level}->{must},
884 wakaba 1.76 value => $value);
885 wakaba 1.1 }
886 wakaba 1.76 } elsif (length $value) {
887     #
888 wakaba 1.1 } else {
889 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
890 wakaba 1.104 level => $self->{level}->{must});
891 wakaba 1.1 }
892     }; # $HTMLTargetAttrChecker
893    
894 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
895     my ($self, $attr) = @_;
896    
897     ## ISSUE: Namespace resolution?
898    
899     my $value = $attr->value;
900    
901     require Whatpm::CSS::SelectorsParser;
902     my $p = Whatpm::CSS::SelectorsParser->new;
903     $p->{pseudo_class}->{$_} = 1 for qw/
904     active checked disabled empty enabled first-child first-of-type
905     focus hover indeterminate last-child last-of-type link only-child
906     only-of-type root target visited
907     lang nth-child nth-last-child nth-of-type nth-last-of-type not
908     -manakai-contains -manakai-current
909     /;
910    
911     $p->{pseudo_element}->{$_} = 1 for qw/
912     after before first-letter first-line
913     /;
914    
915 wakaba 1.104 $p->{level} = $self->{level};
916 wakaba 1.23 $p->{onerror} = sub {
917 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
918 wakaba 1.23 };
919     $p->parse_string ($value);
920     }; # $HTMLSelectorsAttrChecker
921    
922 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
923     my ($charset_value, $self, $attr, $ascii_compat) = @_;
924    
925     ## NOTE: This code is used for |charset=""| attributes, |charset=|
926     ## portion of the |content=""| attributes, and |accept-charset=""|
927     ## attributes.
928 wakaba 1.91
929     ## NOTE: Though the case-sensitivility of |charset| attribute value
930     ## is not explicitly spelled in the HTML5 spec, the Character Set
931     ## registry of IANA, which is referenced from HTML5 spec, says that
932     ## charset name is case-insensitive.
933     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
934    
935     require Message::Charset::Info;
936     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
937    
938     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
939     ## Syntactically valid and registered? What about x-charset names?
940     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
941     ($charset_value)) {
942     $self->{onerror}->(node => $attr,
943 wakaba 1.104 type => 'charset:syntax error',
944     value => $charset_value,
945     level => $self->{level}->{must});
946 wakaba 1.91 }
947    
948     if ($charset) {
949     ## ISSUE: What is "the preferred name for that encoding" (for a charset
950     ## with no "preferred MIME name" label)?
951     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
952     if (($charset_status &
953     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
954     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
955     $self->{onerror}->(node => $attr,
956 wakaba 1.104 type => 'charset:not preferred',
957     value => $charset_value,
958     level => $self->{level}->{must});
959 wakaba 1.91 }
960 wakaba 1.129
961 wakaba 1.91 if (($charset_status &
962     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
963     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
964     if ($charset_value =~ /^x-/) {
965     $self->{onerror}->(node => $attr,
966 wakaba 1.104 type => 'charset:private',
967     value => $charset_value,
968     level => $self->{level}->{good});
969 wakaba 1.91 } else {
970     $self->{onerror}->(node => $attr,
971 wakaba 1.104 type => 'charset:not registered',
972     value => $charset_value,
973     level => $self->{level}->{good});
974 wakaba 1.91 }
975     }
976 wakaba 1.129
977     if ($ascii_compat) {
978     if ($charset->{category} &
979     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
980     #
981     } else {
982     $self->{onerror}->(node => $attr,
983     type => 'charset:not ascii compat',
984     value => $charset_value,
985     level => $self->{level}->{must});
986     }
987     }
988    
989 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
990     } elsif ($charset_value =~ /^x-/) {
991     $self->{onerror}->(node => $attr,
992 wakaba 1.104 type => 'charset:private',
993     value => $charset_value,
994     level => $self->{level}->{good});
995 wakaba 1.129
996     ## NOTE: Whether this is an ASCII-compatible character encoding or
997     ## not is unknown.
998 wakaba 1.91 } else {
999     $self->{onerror}->(node => $attr,
1000 wakaba 1.104 type => 'charset:not registered',
1001     value => $charset_value,
1002     level => $self->{level}->{good});
1003 wakaba 1.129
1004     ## NOTE: Whether this is an ASCII-compatible character encoding or
1005     ## not is unknown.
1006 wakaba 1.91 }
1007    
1008     return ($charset, $charset_value);
1009     }; # $HTMLCharsetChecker
1010    
1011 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1012     ## MUST be the preferred name of an ASCII-compatible character
1013     ## encoding".
1014     my $HTMLCharsetsAttrChecker = sub {
1015     my ($self, $attr) = @_;
1016    
1017     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1018    
1019 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1020 wakaba 1.129
1021 wakaba 1.176 ## XXX
1022 wakaba 1.129 ## ISSUE: Uniqueness is not enforced.
1023    
1024     for my $charset (@value) {
1025     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1026     }
1027    
1028     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1029     }; # $HTMLCharsetsAttrChecker
1030    
1031 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1032     my ($self, $attr) = @_;
1033    
1034     ## NOTE: HTML4 "color" or |%Color;|
1035    
1036     my $value = $attr->value;
1037    
1038     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1039 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1040 wakaba 1.105 level => $self->{level}->{html4_fact});
1041 wakaba 1.68 }
1042    
1043     ## TODO: HTML4 has some guideline on usage of color.
1044     }; # $HTMLColorAttrChecker
1045    
1046 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1047     my ($self, $attr) = @_;
1048     $HTMLURIAttrChecker->(@_);
1049    
1050     my $attr_name = $attr->name;
1051    
1052     if ($attr_name eq 'ref') {
1053     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1054     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1055 wakaba 1.104 level => $self->{level}->{must});
1056 wakaba 1.79 }
1057     }
1058 wakaba 1.155
1059     require Message::URL;
1060 wakaba 1.79 my $doc = $attr->owner_document;
1061     my $doc_uri = $doc->document_uri;
1062 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1063 wakaba 1.79 my $no_frag_uri = $uri->clone;
1064     $no_frag_uri->uri_fragment (undef);
1065     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1066     (not defined $doc_uri and $no_frag_uri eq '')) {
1067     my $fragid = $uri->uri_fragment;
1068     if (defined $fragid) {
1069     push @{$self->{$attr_name}}, [$fragid => $attr];
1070     } else {
1071     DOCEL: {
1072     last DOCEL unless $attr_name eq 'template';
1073    
1074     my $docel = $doc->document_element;
1075     if ($docel) {
1076     my $nsuri = $docel->namespace_uri;
1077     if (defined $nsuri and $nsuri eq $HTML_NS) {
1078     if ($docel->manakai_local_name eq 'datatemplate') {
1079     last DOCEL;
1080     }
1081     }
1082     }
1083    
1084     $self->{onerror}->(node => $attr, type => 'template:not template',
1085 wakaba 1.104 level => $self->{level}->{must});
1086 wakaba 1.79 } # DOCEL
1087     }
1088     } else {
1089     ## TODO: An external document is referenced.
1090     ## The document MUST be an HTML or XML document.
1091     ## If there is a fragment identifier, it MUST point a part of the doc.
1092     ## If the attribute is |template|, the pointed part MUST be a
1093     ## |datatemplat| element.
1094     ## If no fragment identifier is specified, the root element MUST be
1095     ## a |datatemplate| element when the attribute is |template|.
1096     }
1097     }; # $HTMLRefOrTemplateAttrChecker
1098    
1099 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1100     my ($self, $attr) = @_;
1101    
1102     if (defined $attr->namespace_uri) {
1103     my $oe = $attr->owner_element;
1104     my $oe_nsuri = $oe->namespace_uri;
1105 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1106 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1107 wakaba 1.104 level => $self->{level}->{must});
1108 wakaba 1.83 }
1109     }
1110    
1111     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1112     }; # $HTMLRepeatIndexAttrChecker
1113    
1114 wakaba 1.1 my $HTMLAttrChecker = {
1115 wakaba 1.176 accesskey => sub {
1116     my ($self, $attr) = @_;
1117    
1118     ## "Ordered set of unique space-separated tokens"
1119    
1120     my %keys;
1121     my @keys = grep {length} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1122    
1123     for my $key (@keys) {
1124     unless ($keys{$key}) {
1125     $keys{$key} = 1;
1126     if (length $key != 1) {
1127     $self->{onerror}->(node => $attr, type => 'char:syntax error',
1128     value => $key,
1129     level => $self->{level}->{must});
1130     }
1131     } else {
1132     $self->{onerror}->(node => $attr, type => 'duplicate token',
1133     value => $key,
1134     level => $self->{level}->{must});
1135     }
1136     }
1137     }, # accesskey
1138    
1139 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1140 wakaba 1.1 id => sub {
1141 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1142 wakaba 1.1 my $value = $attr->value;
1143     if (length $value > 0) {
1144     if ($self->{id}->{$value}) {
1145 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1146     level => $self->{level}->{must});
1147 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1148     } else {
1149     $self->{id}->{$value} = [$attr];
1150 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1151 wakaba 1.1 }
1152 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1153 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1154     level => $self->{level}->{must});
1155 wakaba 1.1 }
1156     } else {
1157     ## NOTE: MUST contain at least one character
1158 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1159     level => $self->{level}->{must});
1160 wakaba 1.1 }
1161     },
1162     title => sub {}, ## NOTE: No conformance creteria
1163     lang => sub {
1164     my ($self, $attr) = @_;
1165 wakaba 1.6 my $value = $attr->value;
1166     if ($value eq '') {
1167     #
1168     } else {
1169     require Whatpm::LangTag;
1170     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1171 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1172 wakaba 1.106 }, $self->{level});
1173 wakaba 1.6 }
1174 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1175 wakaba 1.6
1176     ## TODO: test data
1177 wakaba 1.111
1178     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1179     ## non-conforming. Such errors are detected by the checkers of
1180     ## |{}xml:lang| and |{xml}:lang| attributes.
1181 wakaba 1.1 },
1182     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1183     class => sub {
1184     my ($self, $attr) = @_;
1185 wakaba 1.132
1186     ## NOTE: "Unordered set of unique space-separated tokens".
1187    
1188 wakaba 1.1 my %word;
1189 wakaba 1.132 for my $word (grep {length $_}
1190     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1191 wakaba 1.1 unless ($word{$word}) {
1192     $word{$word} = 1;
1193     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1194     } else {
1195 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1196     value => $word,
1197     level => $self->{level}->{must});
1198 wakaba 1.1 }
1199     }
1200     },
1201 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1202     true => 1, false => 1, '' => 1,
1203     }),
1204 wakaba 1.1 contextmenu => sub {
1205     my ($self, $attr) = @_;
1206     my $value = $attr->value;
1207 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1208 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1209     ## What is "in the DOM"? A menu Element node that is not part
1210     ## of the Document tree is in the DOM? A menu Element node that
1211     ## belong to another Document tree is in the DOM?
1212     },
1213 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1214 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1215 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1216     registrationmark => sub {
1217     my ($self, $attr, $item, $element_state) = @_;
1218    
1219     ## NOTE: Any value is conforming.
1220    
1221     if ($self->{flag}->{in_rule}) {
1222     my $el = $attr->owner_element;
1223     my $ln = $el->manakai_local_name;
1224     if ($ln eq 'nest' or
1225     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1226     my $nsuri = $el->namespace_uri;
1227     if (defined $nsuri and $nsuri eq $HTML_NS) {
1228     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1229 wakaba 1.104 level => $self->{level}->{must});
1230 wakaba 1.79 }
1231     }
1232     } else {
1233     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1234 wakaba 1.104 level => $self->{level}->{must});
1235 wakaba 1.79 }
1236     },
1237 wakaba 1.80 repeat => sub {
1238     my ($self, $attr) = @_;
1239 wakaba 1.83
1240     if (defined $attr->namespace_uri) {
1241     my $oe = $attr->owner_element;
1242     my $oe_nsuri = $oe->namespace_uri;
1243     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1244     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1245 wakaba 1.104 level => $self->{level}->{must});
1246 wakaba 1.83 }
1247     }
1248    
1249 wakaba 1.80 my $value = $attr->value;
1250     if ($value eq 'template') {
1251     #
1252     } elsif ($value =~ /\A-?[0-9]+\z/) {
1253     #
1254     } else {
1255     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1256 wakaba 1.104 level => $self->{level}->{must});
1257 wakaba 1.80 }
1258    
1259     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1260     ## that the attribute MAY be specified to any element, or that the
1261     ## element with that attribute (i.e. a repetition template) can be
1262     ## inserted anywhere in a document tree?
1263     },
1264 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1265     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1266     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1267 wakaba 1.80 'repeat-template' => sub {
1268 wakaba 1.83 my ($self, $attr) = @_;
1269    
1270     if (defined $attr->namespace_uri) {
1271     my $oe = $attr->owner_element;
1272     my $oe_nsuri = $oe->namespace_uri;
1273 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1274 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1275 wakaba 1.104 level => $self->{level}->{must});
1276 wakaba 1.83 }
1277     }
1278    
1279 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1280     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1281     ## attribute allowed on an element that is not a repetition block?
1282     },
1283 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1284 wakaba 1.128 style => sub {
1285     my ($self, $attr) = @_;
1286    
1287     $self->{onsubdoc}->({s => $attr->value,
1288     container_node => $attr,
1289     media_type => 'text/x-css-inline',
1290     is_char_string => 1});
1291    
1292     ## NOTE: "... MUST still be comprehensible and usable if those
1293     ## attributes were removed" is a semantic requirement, it cannot
1294     ## be tested.
1295     },
1296 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1297 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1298 wakaba 1.111 'xml:lang' => sub {
1299     my ($self, $attr) = @_;
1300    
1301     if ($attr->owner_document->manakai_is_html) {
1302     $self->{onerror}->(type => 'in HTML:xml:lang',
1303     level => $self->{level}->{info},
1304     node => $attr);
1305     ## NOTE: This is not an error, but the attribute will be ignored.
1306     } else {
1307     $self->{onerror}->(type => 'in XML:xml:lang',
1308     level => $self->{level}->{html5_no_may},
1309     node => $attr);
1310     ## TODO: We need to add test for this error.
1311     }
1312    
1313     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1314     (undef, 'lang');
1315     if ($lang_attr) {
1316     my $lang_attr_value = $lang_attr->value;
1317     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1318     my $value = $attr->value;
1319     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1320     if ($lang_attr_value ne $value) {
1321     $self->{onerror}->(type => 'xml:lang ne lang',
1322     level => $self->{level}->{must},
1323     node => $attr);
1324     }
1325     } else {
1326     $self->{onerror}->(type => 'xml:lang not allowed',
1327     level => $self->{level}->{must},
1328     node => $attr);
1329     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1330     }
1331     },
1332 wakaba 1.74 xmlns => sub {
1333     my ($self, $attr) = @_;
1334     my $value = $attr->value;
1335     unless ($value eq $HTML_NS) {
1336 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1337     level => $self->{level}->{must});
1338 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1339     }
1340     unless ($attr->owner_document->manakai_is_html) {
1341 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1342     level => $self->{level}->{must});
1343 wakaba 1.74 ## TODO: Test
1344     }
1345    
1346     ## TODO: Should be resolved?
1347     push @{$self->{return}->{uri}->{$value} ||= []},
1348     {node => $attr, type => {namespace => 1}};
1349     },
1350 wakaba 1.1 };
1351    
1352 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1353    
1354 wakaba 1.49 my %HTMLAttrStatus = (
1355 wakaba 1.176 accesskey => FEATURE_HTML5_FD,
1356 wakaba 1.153 class => FEATURE_HTML5_WD,
1357 wakaba 1.50 contenteditable => FEATURE_HTML5_DEFAULT,
1358     contextmenu => FEATURE_HTML5_WD,
1359 wakaba 1.153 dir => FEATURE_HTML5_WD,
1360 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1361 wakaba 1.115 hidden => FEATURE_HTML5_DEFAULT,
1362 wakaba 1.153 id => FEATURE_HTML5_WD,
1363 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1364 wakaba 1.153 lang => FEATURE_HTML5_WD,
1365 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1366     registrationmark => FEATURE_HTML5_AT_RISK,
1367 wakaba 1.60 repeat => FEATURE_WF2,
1368     'repeat-max' => FEATURE_WF2,
1369     'repeat-min' => FEATURE_WF2,
1370     'repeat-start' => FEATURE_WF2,
1371     'repeat-template' => FEATURE_WF2,
1372 wakaba 1.154 role => 0,
1373 wakaba 1.153 style => FEATURE_HTML5_WD,
1374 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1375     template => FEATURE_HTML5_AT_RISK,
1376 wakaba 1.153 title => FEATURE_HTML5_WD,
1377 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1378 wakaba 1.49 );
1379    
1380     my %HTMLM12NCommonAttrStatus = (
1381 wakaba 1.154 about => FEATURE_RDFA_REC,
1382 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1383 wakaba 1.154 content => FEATURE_RDFA_REC,
1384     datatype => FEATURE_RDFA_REC,
1385 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1386 wakaba 1.154 href => FEATURE_RDFA_REC,
1387 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1388 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1389 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1390     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1391     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1392     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1393     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1394     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1395     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1396     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1397     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1398     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1399 wakaba 1.154 property => FEATURE_RDFA_REC,
1400     rel => FEATURE_RDFA_REC,
1401     resource => FEATURE_RDFA_REC,
1402     rev => FEATURE_RDFA_REC,
1403 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1404 wakaba 1.78 # FEATURE_M12N10_REC,
1405 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1406 wakaba 1.55 FEATURE_M12N10_REC,
1407 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1408 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1409 wakaba 1.49 );
1410    
1411 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1412     ## Core
1413 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1414     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1415 wakaba 1.82 #xml:id
1416     layout => FEATURE_XHTML2_ED,
1417 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1418 wakaba 1.82
1419     ## Hypertext
1420     cite => FEATURE_XHTML2_ED,
1421     href => FEATURE_XHTML2_ED,
1422     hreflang => FEATURE_XHTML2_ED,
1423     hrefmedia => FEATURE_XHTML2_ED,
1424     hreftype => FEATURE_XHTML2_ED,
1425     nextfocus => FEATURE_XHTML2_ED,
1426     prevfocus => FEATURE_XHTML2_ED,
1427     target => FEATURE_XHTML2_ED,
1428     #xml:base
1429    
1430     ## I18N
1431     #xml:lang
1432    
1433     ## Bi-directional
1434 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1435 wakaba 1.82
1436     ## Edit
1437     edit => FEATURE_XHTML2_ED,
1438     datetime => FEATURE_XHTML2_ED,
1439    
1440     ## Embedding
1441     encoding => FEATURE_XHTML2_ED,
1442     src => FEATURE_XHTML2_ED,
1443     srctype => FEATURE_XHTML2_ED,
1444    
1445     ## Image Map
1446     usemap => FEATURE_XHTML2_ED,
1447     ismap => FEATURE_XHTML2_ED,
1448     shape => FEATURE_XHTML2_ED,
1449     coords => FEATURE_XHTML2_ED,
1450    
1451     ## Media
1452     media => FEATURE_XHTML2_ED,
1453    
1454     ## Metadata
1455     about => FEATURE_XHTML2_ED,
1456     content => FEATURE_XHTML2_ED,
1457     datatype => FEATURE_XHTML2_ED,
1458     instanceof => FEATURE_XHTML2_ED,
1459     property => FEATURE_XHTML2_ED,
1460     rel => FEATURE_XHTML2_ED,
1461     resource => FEATURE_XHTML2_ED,
1462     rev => FEATURE_XHTML2_ED,
1463    
1464     ## Role
1465 wakaba 1.154 role => FEATURE_XHTML2_ED,
1466 wakaba 1.82
1467     ## Style
1468 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML2_ED, # "strongly discouraged"
1469 wakaba 1.82 );
1470    
1471     my %HTMLM12NXHTML2CommonAttrStatus = (
1472     %HTMLM12NCommonAttrStatus,
1473     %XHTML2CommonAttrStatus,
1474    
1475 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1476 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1477 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1478     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1479 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1480 wakaba 1.154 href => FEATURE_RDFA_REC,
1481 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1482 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1483     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1484     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1485     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1486     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1487 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1488 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1489 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1490 wakaba 1.82 FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1491 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1492 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1493 wakaba 1.82 );
1494    
1495 wakaba 1.1 for (qw/
1496     onabort onbeforeunload onblur onchange onclick oncontextmenu
1497     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1498     ondragstart ondrop onerror onfocus onkeydown onkeypress
1499     onkeyup onload onmessage onmousedown onmousemove onmouseout
1500     onmouseover onmouseup onmousewheel onresize onscroll onselect
1501 wakaba 1.77 onstorage onsubmit onunload
1502 wakaba 1.1 /) {
1503     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1504 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1505 wakaba 1.1 }
1506    
1507 wakaba 1.170 for (qw/
1508     ondataunavailable
1509     /) {
1510     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1511     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1512     }
1513    
1514 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1515     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1516     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1517    
1518     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1519     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1520     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1521     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1522     }
1523    
1524 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1525 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1526 wakaba 1.82 }
1527 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1528     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1529 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1530     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1531     ismap layout media nextfocus prevfocus shape src srctype style
1532     target usemap/) {
1533     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1534     }
1535     for (qw/class dir id title/) {
1536     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1537     }
1538     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1539     onmouseout onkeypress onkeydown onkeyup/) {
1540     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1541     }
1542    
1543 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1544     ## NOTE: "Authors should ... when the attributes are ignored and
1545     ## any associated CSS dropped, the page is still usable." (semantic
1546     ## constraint.)
1547     }; # $HTMLDatasetAttrChecker
1548    
1549 wakaba 1.153 my $HTMLDatasetAttrStatus = FEATURE_HTML5_WD;
1550 wakaba 1.73
1551 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1552     my $element_specific_checker = shift;
1553 wakaba 1.49 my $element_specific_status = shift;
1554 wakaba 1.1 return sub {
1555 wakaba 1.40 my ($self, $item, $element_state) = @_;
1556     for my $attr (@{$item->{node}->attributes}) {
1557 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1558     $attr_ns = '' unless defined $attr_ns;
1559     my $attr_ln = $attr->manakai_local_name;
1560     my $checker;
1561 wakaba 1.73 my $status;
1562 wakaba 1.1 if ($attr_ns eq '') {
1563 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1564     $attr_ln !~ /[A-Z]/) {
1565 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1566     $status = $HTMLDatasetAttrStatus;
1567     } else {
1568     $checker = $element_specific_checker->{$attr_ln}
1569     || $HTMLAttrChecker->{$attr_ln};
1570     $status = $element_specific_status->{$attr_ln};
1571     }
1572 wakaba 1.1 }
1573     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1574 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1575 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1576     || $AttrStatus->{$attr_ns}->{''};
1577     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1578 wakaba 1.1 if ($checker) {
1579 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1580 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1581 wakaba 1.54 #
1582 wakaba 1.1 } else {
1583 wakaba 1.104 $self->{onerror}->(node => $attr,
1584     type => 'unknown attribute',
1585     level => $self->{level}->{uncertain});
1586 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1587     }
1588 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1589 wakaba 1.1 }
1590     };
1591     }; # $GetHTMLAttrsChecker
1592    
1593 wakaba 1.40 my %HTMLChecker = (
1594     %Whatpm::ContentChecker::AnyChecker,
1595 wakaba 1.79 check_start => sub {
1596     my ($self, $item, $element_state) = @_;
1597    
1598     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1599     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1600     },
1601 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1602 wakaba 1.40 );
1603    
1604     my %HTMLEmptyChecker = (
1605     %HTMLChecker,
1606     check_child_element => sub {
1607     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1608     $child_is_transparent, $element_state) = @_;
1609 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1610     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1611 wakaba 1.40 $self->{onerror}->(node => $child_el,
1612     type => 'element not allowed:minus',
1613 wakaba 1.104 level => $self->{level}->{must});
1614 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1615     #
1616     } else {
1617     $self->{onerror}->(node => $child_el,
1618     type => 'element not allowed:empty',
1619 wakaba 1.104 level => $self->{level}->{must});
1620 wakaba 1.40 }
1621     },
1622     check_child_text => sub {
1623     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1624     if ($has_significant) {
1625     $self->{onerror}->(node => $child_node,
1626     type => 'character not allowed:empty',
1627 wakaba 1.104 level => $self->{level}->{must});
1628 wakaba 1.40 }
1629     },
1630     );
1631    
1632     my %HTMLTextChecker = (
1633     %HTMLChecker,
1634     check_child_element => sub {
1635     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1636     $child_is_transparent, $element_state) = @_;
1637 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1638     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1639 wakaba 1.40 $self->{onerror}->(node => $child_el,
1640     type => 'element not allowed:minus',
1641 wakaba 1.104 level => $self->{level}->{must});
1642 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1643     #
1644     } else {
1645 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1646     level => $self->{level}->{must});
1647 wakaba 1.40 }
1648     },
1649     );
1650    
1651 wakaba 1.72 my %HTMLFlowContentChecker = (
1652 wakaba 1.40 %HTMLChecker,
1653     check_child_element => sub {
1654     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1655     $child_is_transparent, $element_state) = @_;
1656 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1657     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1658 wakaba 1.40 $self->{onerror}->(node => $child_el,
1659     type => 'element not allowed:minus',
1660 wakaba 1.104 level => $self->{level}->{must});
1661 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1662     #
1663     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1664     if ($element_state->{has_non_style} or
1665     not $child_el->has_attribute_ns (undef, 'scoped')) {
1666 wakaba 1.104 $self->{onerror}->(node => $child_el,
1667 wakaba 1.72 type => 'element not allowed:flow style',
1668 wakaba 1.104 level => $self->{level}->{must});
1669 wakaba 1.40 }
1670 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1671 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1672 wakaba 1.40 } else {
1673     $element_state->{has_non_style} = 1;
1674 wakaba 1.104 $self->{onerror}->(node => $child_el,
1675 wakaba 1.72 type => 'element not allowed:flow',
1676 wakaba 1.104 level => $self->{level}->{must})
1677 wakaba 1.40 }
1678     },
1679     check_child_text => sub {
1680     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1681     if ($has_significant) {
1682     $element_state->{has_non_style} = 1;
1683     }
1684     },
1685     check_end => sub {
1686     my ($self, $item, $element_state) = @_;
1687 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1688 wakaba 1.40 if ($element_state->{has_significant}) {
1689 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1690 wakaba 1.40 } elsif ($item->{transparent}) {
1691     #
1692     } else {
1693     $self->{onerror}->(node => $item->{node},
1694 wakaba 1.104 level => $self->{level}->{should},
1695 wakaba 1.40 type => 'no significant content');
1696     }
1697     },
1698     );
1699    
1700     my %HTMLPhrasingContentChecker = (
1701     %HTMLChecker,
1702     check_child_element => sub {
1703     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1704     $child_is_transparent, $element_state) = @_;
1705 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1706     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1707 wakaba 1.40 $self->{onerror}->(node => $child_el,
1708     type => 'element not allowed:minus',
1709 wakaba 1.104 level => $self->{level}->{must});
1710 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1711     #
1712     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1713     #
1714     } else {
1715     $self->{onerror}->(node => $child_el,
1716     type => 'element not allowed:phrasing',
1717 wakaba 1.104 level => $self->{level}->{must});
1718 wakaba 1.40 }
1719     },
1720 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1721 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1722 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1723 wakaba 1.40 ## and |check_child_text|.
1724     );
1725    
1726 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1727 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1728 wakaba 1.46 ## with parent?
1729 wakaba 1.40
1730 wakaba 1.1 our $Element;
1731     our $ElementDefault;
1732    
1733     $Element->{$HTML_NS}->{''} = {
1734 wakaba 1.40 %HTMLChecker,
1735 wakaba 1.1 };
1736    
1737     $Element->{$HTML_NS}->{html} = {
1738 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1739 wakaba 1.1 is_root => 1,
1740 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1741 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1742 wakaba 1.67 version => sub {
1743     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1744     ## Though DTDs of various versions of HTML define the attribute
1745     ## as |#FIXED|, this conformance checker does no check for
1746     ## the attribute value, since what kind of check should be done
1747     ## is unknown.
1748     },
1749 wakaba 1.49 }, {
1750     %HTMLAttrStatus,
1751 wakaba 1.82 %XHTML2CommonAttrStatus,
1752 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1753     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1754     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1755     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1756     manifest => FEATURE_HTML5_WD,
1757 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1758 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1759 wakaba 1.1 }),
1760 wakaba 1.40 check_start => sub {
1761     my ($self, $item, $element_state) = @_;
1762     $element_state->{phase} = 'before head';
1763 wakaba 1.79
1764 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1765 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1766     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1767 wakaba 1.40 },
1768     check_child_element => sub {
1769     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1770     $child_is_transparent, $element_state) = @_;
1771 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1772     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1773 wakaba 1.40 $self->{onerror}->(node => $child_el,
1774     type => 'element not allowed:minus',
1775 wakaba 1.104 level => $self->{level}->{must});
1776 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1777     #
1778     } elsif ($element_state->{phase} eq 'before head') {
1779     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1780     $element_state->{phase} = 'after head';
1781     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1782     $self->{onerror}->(node => $child_el,
1783 wakaba 1.104 type => 'ps element missing',
1784     text => 'head',
1785     level => $self->{level}->{must});
1786 wakaba 1.40 $element_state->{phase} = 'after body';
1787     } else {
1788     $self->{onerror}->(node => $child_el,
1789 wakaba 1.104 type => 'element not allowed',
1790     level => $self->{level}->{must});
1791 wakaba 1.40 }
1792     } elsif ($element_state->{phase} eq 'after head') {
1793     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1794     $element_state->{phase} = 'after body';
1795     } else {
1796     $self->{onerror}->(node => $child_el,
1797 wakaba 1.104 type => 'element not allowed',
1798     level => $self->{level}->{must});
1799 wakaba 1.40 }
1800     } elsif ($element_state->{phase} eq 'after body') {
1801     $self->{onerror}->(node => $child_el,
1802 wakaba 1.104 type => 'element not allowed',
1803     level => $self->{level}->{must});
1804 wakaba 1.40 } else {
1805     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1806     }
1807     },
1808     check_child_text => sub {
1809     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1810     if ($has_significant) {
1811     $self->{onerror}->(node => $child_node,
1812 wakaba 1.104 type => 'character not allowed',
1813     level => $self->{level}->{must});
1814 wakaba 1.40 }
1815     },
1816     check_end => sub {
1817     my ($self, $item, $element_state) = @_;
1818     if ($element_state->{phase} eq 'after body') {
1819     #
1820     } elsif ($element_state->{phase} eq 'before head') {
1821     $self->{onerror}->(node => $item->{node},
1822 wakaba 1.104 type => 'child element missing',
1823     text => 'head',
1824     level => $self->{level}->{must});
1825 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1826 wakaba 1.104 type => 'child element missing',
1827     text => 'body',
1828     level => $self->{level}->{must});
1829 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1830     $self->{onerror}->(node => $item->{node},
1831 wakaba 1.104 type => 'child element missing',
1832     text => 'body',
1833     level => $self->{level}->{must});
1834 wakaba 1.40 } else {
1835     die "check_end: Bad |html| phase: $element_state->{phase}";
1836     }
1837 wakaba 1.1
1838 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1839     },
1840     };
1841 wakaba 1.25
1842 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1843 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1844 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1845     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1846     }, {
1847 wakaba 1.49 %HTMLAttrStatus,
1848 wakaba 1.82 %XHTML2CommonAttrStatus,
1849 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1850     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1851     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1852     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1853 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1854     }),
1855 wakaba 1.40 check_child_element => sub {
1856     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1857     $child_is_transparent, $element_state) = @_;
1858 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1859     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1860 wakaba 1.40 $self->{onerror}->(node => $child_el,
1861     type => 'element not allowed:minus',
1862 wakaba 1.104 level => $self->{level}->{must});
1863 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1864     #
1865     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1866     unless ($element_state->{has_title}) {
1867     $element_state->{has_title} = 1;
1868     } else {
1869     $self->{onerror}->(node => $child_el,
1870     type => 'element not allowed:head title',
1871 wakaba 1.104 level => $self->{level}->{must});
1872 wakaba 1.40 }
1873     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1874     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1875     $self->{onerror}->(node => $child_el,
1876     type => 'element not allowed:head style',
1877 wakaba 1.104 level => $self->{level}->{must});
1878 wakaba 1.1 }
1879 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1880     #
1881    
1882     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1883     ## a |meta| element with none of |charset|, |name|,
1884     ## or |http-equiv| attribute is not allowed. It is non-conforming
1885     ## anyway.
1886 wakaba 1.56
1887     ## TODO: |form| MUST be empty and in XML [WF2].
1888 wakaba 1.40 } else {
1889     $self->{onerror}->(node => $child_el,
1890     type => 'element not allowed:metadata',
1891 wakaba 1.104 level => $self->{level}->{must});
1892 wakaba 1.40 }
1893     $element_state->{in_head_original} = $self->{flag}->{in_head};
1894     $self->{flag}->{in_head} = 1;
1895     },
1896     check_child_text => sub {
1897     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1898     if ($has_significant) {
1899 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1900     level => $self->{level}->{must});
1901 wakaba 1.1 }
1902 wakaba 1.40 },
1903     check_end => sub {
1904     my ($self, $item, $element_state) = @_;
1905     unless ($element_state->{has_title}) {
1906     $self->{onerror}->(node => $item->{node},
1907 wakaba 1.104 type => 'child element missing',
1908     text => 'title',
1909 wakaba 1.105 level => $self->{level}->{must});
1910 wakaba 1.1 }
1911 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1912 wakaba 1.1
1913 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1914 wakaba 1.1 },
1915     };
1916    
1917 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1918     %HTMLTextChecker,
1919 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1920 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1921     %HTMLAttrStatus,
1922 wakaba 1.82 %XHTML2CommonAttrStatus,
1923 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1924     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1925     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1926     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1927 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1928 wakaba 1.49 }),
1929 wakaba 1.40 };
1930 wakaba 1.1
1931 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1932 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1933 wakaba 1.40 %HTMLEmptyChecker,
1934     check_attrs => sub {
1935     my ($self, $item, $element_state) = @_;
1936 wakaba 1.1
1937 wakaba 1.40 if ($self->{has_base}) {
1938     $self->{onerror}->(node => $item->{node},
1939 wakaba 1.104 type => 'element not allowed:base',
1940     level => $self->{level}->{must});
1941 wakaba 1.40 } else {
1942     $self->{has_base} = 1;
1943 wakaba 1.29 }
1944    
1945 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1946     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1947 wakaba 1.14
1948     if ($self->{has_uri_attr} and $has_href) {
1949 wakaba 1.4 ## ISSUE: Are these examples conforming?
1950     ## <head profile="a b c"><base href> (except for |profile|'s
1951     ## non-conformance)
1952     ## <title xml:base="relative"/><base href/> (maybe it should be)
1953     ## <unknown xmlns="relative"/><base href/> (assuming that
1954     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1955     ## <style>@import 'relative';</style><base href>
1956     ## <script>location.href = 'relative';</script><base href>
1957 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1958     ## an exception.
1959 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1960 wakaba 1.104 type => 'basehref after URL attribute',
1961     level => $self->{level}->{must});
1962 wakaba 1.4 }
1963 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1964 wakaba 1.4 ## ISSUE: Are these examples conforming?
1965     ## <head><title xlink:href=""/><base target="name"/></head>
1966     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1967     ## (assuming that |xbl:xbl| is allowed before |base|)
1968     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1969     ## <link href=""/><base target="name"/>
1970     ## <link rel=unknown href=""><base target=name>
1971 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1972 wakaba 1.104 type => 'basetarget after hyperlink',
1973     level => $self->{level}->{must});
1974 wakaba 1.4 }
1975    
1976 wakaba 1.14 if (not $has_href and not $has_target) {
1977 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1978 wakaba 1.104 type => 'attribute missing:href|target',
1979     level => $self->{level}->{must});
1980 wakaba 1.14 }
1981    
1982 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1983    
1984 wakaba 1.4 return $GetHTMLAttrsChecker->({
1985     href => $HTMLURIAttrChecker,
1986     target => $HTMLTargetAttrChecker,
1987 wakaba 1.49 }, {
1988     %HTMLAttrStatus,
1989 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1990     id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1991     target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1992 wakaba 1.40 })->($self, $item, $element_state);
1993 wakaba 1.4 },
1994 wakaba 1.1 };
1995    
1996     $Element->{$HTML_NS}->{link} = {
1997 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1998 wakaba 1.40 %HTMLEmptyChecker,
1999     check_attrs => sub {
2000     my ($self, $item, $element_state) = @_;
2001 wakaba 1.96 my $sizes_attr;
2002 wakaba 1.1 $GetHTMLAttrsChecker->({
2003 wakaba 1.91 charset => sub {
2004     my ($self, $attr) = @_;
2005     $HTMLCharsetChecker->($attr->value, @_);
2006     },
2007 wakaba 1.1 href => $HTMLURIAttrChecker,
2008 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2009 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2010 wakaba 1.1 media => $HTMLMQAttrChecker,
2011     hreflang => $HTMLLanguageTagAttrChecker,
2012 wakaba 1.96 sizes => sub {
2013     my ($self, $attr) = @_;
2014     $sizes_attr = $attr;
2015     my %word;
2016     for my $word (grep {length $_}
2017 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2018 wakaba 1.96 unless ($word{$word}) {
2019     $word{$word} = 1;
2020     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2021     #
2022     } else {
2023     $self->{onerror}->(node => $attr,
2024 wakaba 1.104 type => 'sizes:syntax error',
2025 wakaba 1.96 value => $word,
2026 wakaba 1.104 level => $self->{level}->{must});
2027 wakaba 1.96 }
2028     } else {
2029     $self->{onerror}->(node => $attr, type => 'duplicate token',
2030     value => $word,
2031 wakaba 1.104 level => $self->{level}->{must});
2032 wakaba 1.96 }
2033     }
2034     },
2035 wakaba 1.70 target => $HTMLTargetAttrChecker,
2036 wakaba 1.1 type => $HTMLIMTAttrChecker,
2037     ## NOTE: Though |title| has special semantics,
2038     ## syntactically same as the |title| as global attribute.
2039 wakaba 1.49 }, {
2040     %HTMLAttrStatus,
2041 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2042 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2043     ## NOTE: |charset| attribute had been part of HTML5 spec though
2044     ## it had been commented out.
2045 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2046 wakaba 1.82 FEATURE_M12N10_REC,
2047 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2048     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2049     media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2050 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2051 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2052 wakaba 1.153 FEATURE_M12N10_REC,
2053 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2054 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2055 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2056 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2057 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2058     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2059 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2060 wakaba 1.40 })->($self, $item, $element_state);
2061 wakaba 1.96
2062 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2063     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2064 wakaba 1.4 } else {
2065 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2066 wakaba 1.104 type => 'attribute missing',
2067     text => 'href',
2068     level => $self->{level}->{must});
2069 wakaba 1.1 }
2070 wakaba 1.96
2071 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2072     $self->{onerror}->(node => $item->{node},
2073 wakaba 1.104 type => 'attribute missing',
2074     text => 'rel',
2075     level => $self->{level}->{must});
2076 wakaba 1.96 }
2077    
2078     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2079     $self->{onerror}->(node => $sizes_attr,
2080     type => 'attribute not allowed',
2081 wakaba 1.104 level => $self->{level}->{must});
2082 wakaba 1.1 }
2083 wakaba 1.116
2084     if ($element_state->{link_rel}->{alternate} and
2085     $element_state->{link_rel}->{stylesheet}) {
2086     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2087     unless ($title_attr) {
2088     $self->{onerror}->(node => $item->{node},
2089     type => 'attribute missing',
2090     text => 'title',
2091     level => $self->{level}->{must});
2092     } elsif ($title_attr->value eq '') {
2093     $self->{onerror}->(node => $title_attr,
2094     type => 'empty style sheet title',
2095     level => $self->{level}->{must});
2096     }
2097     }
2098 wakaba 1.1 },
2099     };
2100    
2101     $Element->{$HTML_NS}->{meta} = {
2102 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2103 wakaba 1.40 %HTMLEmptyChecker,
2104     check_attrs => sub {
2105     my ($self, $item, $element_state) = @_;
2106 wakaba 1.1 my $name_attr;
2107     my $http_equiv_attr;
2108     my $charset_attr;
2109     my $content_attr;
2110 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2111 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2112     $attr_ns = '' unless defined $attr_ns;
2113     my $attr_ln = $attr->manakai_local_name;
2114     my $checker;
2115 wakaba 1.73 my $status;
2116 wakaba 1.1 if ($attr_ns eq '') {
2117 wakaba 1.73 $status = {
2118     %HTMLAttrStatus,
2119 wakaba 1.82 %XHTML2CommonAttrStatus,
2120 wakaba 1.153 charset => FEATURE_HTML5_WD,
2121     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2122     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2123     'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2124     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2125     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2126     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2127 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2128     }->{$attr_ln};
2129    
2130 wakaba 1.1 if ($attr_ln eq 'content') {
2131     $content_attr = $attr;
2132     $checker = 1;
2133     } elsif ($attr_ln eq 'name') {
2134     $name_attr = $attr;
2135     $checker = 1;
2136     } elsif ($attr_ln eq 'http-equiv') {
2137     $http_equiv_attr = $attr;
2138     $checker = 1;
2139     } elsif ($attr_ln eq 'charset') {
2140     $charset_attr = $attr;
2141     $checker = 1;
2142 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2143 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2144 wakaba 1.67 $checker = sub {};
2145 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2146     $attr_ln !~ /[A-Z]/) {
2147 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2148     $status = $HTMLDatasetAttrStatus;
2149 wakaba 1.1 } else {
2150     $checker = $HTMLAttrChecker->{$attr_ln}
2151 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2152 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2153     }
2154     } else {
2155     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2156 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2157     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2158     || $AttrStatus->{$attr_ns}->{''};
2159     $status = FEATURE_ALLOWED if not defined $status;
2160 wakaba 1.1 }
2161 wakaba 1.62
2162 wakaba 1.1 if ($checker) {
2163 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2164 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2165 wakaba 1.54 #
2166 wakaba 1.1 } else {
2167 wakaba 1.104 $self->{onerror}->(node => $attr,
2168     type => 'unknown attribute',
2169     level => $self->{level}->{uncertain});
2170 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2171     }
2172    
2173 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2174 wakaba 1.1 }
2175    
2176     if (defined $name_attr) {
2177     if (defined $http_equiv_attr) {
2178     $self->{onerror}->(node => $http_equiv_attr,
2179 wakaba 1.104 type => 'attribute not allowed',
2180     level => $self->{level}->{must});
2181 wakaba 1.1 } elsif (defined $charset_attr) {
2182     $self->{onerror}->(node => $charset_attr,
2183 wakaba 1.104 type => 'attribute not allowed',
2184     level => $self->{level}->{must});
2185 wakaba 1.1 }
2186     my $metadata_name = $name_attr->value;
2187     my $metadata_value;
2188     if (defined $content_attr) {
2189     $metadata_value = $content_attr->value;
2190     } else {
2191 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2192 wakaba 1.104 type => 'attribute missing',
2193     text => 'content',
2194     level => $self->{level}->{must});
2195 wakaba 1.1 $metadata_value = '';
2196     }
2197     } elsif (defined $http_equiv_attr) {
2198     if (defined $charset_attr) {
2199     $self->{onerror}->(node => $charset_attr,
2200 wakaba 1.104 type => 'attribute not allowed',
2201     level => $self->{level}->{must});
2202 wakaba 1.1 }
2203     unless (defined $content_attr) {
2204 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2205 wakaba 1.104 type => 'attribute missing',
2206     text => 'content',
2207     level => $self->{level}->{must});
2208 wakaba 1.1 }
2209     } elsif (defined $charset_attr) {
2210     if (defined $content_attr) {
2211     $self->{onerror}->(node => $content_attr,
2212 wakaba 1.104 type => 'attribute not allowed',
2213     level => $self->{level}->{must});
2214 wakaba 1.1 }
2215     } else {
2216     if (defined $content_attr) {
2217     $self->{onerror}->(node => $content_attr,
2218 wakaba 1.104 type => 'attribute not allowed',
2219     level => $self->{level}->{must});
2220 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2221 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2222     level => $self->{level}->{must});
2223 wakaba 1.1 } else {
2224 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2225 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2226     level => $self->{level}->{must});
2227 wakaba 1.1 }
2228     }
2229    
2230 wakaba 1.32 my $check_charset_decl = sub () {
2231 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2232 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2233     for my $el (@{$parent->child_nodes}) {
2234     next unless $el->node_type == 1; # ELEMENT_NODE
2235 wakaba 1.40 unless ($el eq $item->{node}) {
2236 wakaba 1.29 ## NOTE: Not the first child element.
2237 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2238 wakaba 1.32 type => 'element not allowed:meta charset',
2239 wakaba 1.104 level => $self->{level}->{must});
2240 wakaba 1.29 }
2241     last;
2242     ## NOTE: Entity references are not supported.
2243     }
2244     } else {
2245 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2246 wakaba 1.32 type => 'element not allowed:meta charset',
2247 wakaba 1.104 level => $self->{level}->{must});
2248 wakaba 1.29 }
2249    
2250 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2251     $self->{onerror}->(node => $item->{node},
2252 wakaba 1.32 type => 'in XML:charset',
2253 wakaba 1.104 level => $self->{level}->{must});
2254 wakaba 1.1 }
2255 wakaba 1.32 }; # $check_charset_decl
2256 wakaba 1.21
2257 wakaba 1.32 my $check_charset = sub ($$) {
2258     my ($attr, $charset_value) = @_;
2259 wakaba 1.21
2260 wakaba 1.91 my $charset;
2261     ($charset, $charset_value)
2262     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2263    
2264 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2265 wakaba 1.21 if (defined $ic) {
2266     ## TODO: Test for this case
2267     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2268     if ($charset ne $ic_charset) {
2269 wakaba 1.32 $self->{onerror}->(node => $attr,
2270 wakaba 1.104 type => 'mismatched charset name',
2271 wakaba 1.106 text => $ic,
2272 wakaba 1.104 value => $charset_value,
2273     level => $self->{level}->{must});
2274 wakaba 1.21 }
2275     } else {
2276     ## NOTE: MUST, but not checkable, since the document is not originally
2277     ## in serialized form (or the parser does not preserve the input
2278     ## encoding information).
2279 wakaba 1.32 $self->{onerror}->(node => $attr,
2280 wakaba 1.104 type => 'mismatched charset name not checked',
2281     value => $charset_value,
2282     level => $self->{level}->{uncertain});
2283 wakaba 1.21 }
2284    
2285 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2286     $self->{onerror}->(node => $attr,
2287 wakaba 1.104 type => 'charref in charset',
2288     level => $self->{level}->{must},
2289     layer => 'syntax');
2290 wakaba 1.22 }
2291 wakaba 1.32 }; # $check_charset
2292    
2293     ## TODO: metadata conformance
2294    
2295     ## TODO: pragma conformance
2296     if (defined $http_equiv_attr) { ## An enumerated attribute
2297     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2298 wakaba 1.33
2299 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2300     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2301     node => $http_equiv_attr,
2302 wakaba 1.104 level => $self->{level}->{must});
2303 wakaba 1.85 } else {
2304     $self->{has_http_equiv}->{$keyword} = 1;
2305     }
2306    
2307     if ($keyword eq 'content-type') {
2308 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2309 wakaba 1.33
2310 wakaba 1.32 $check_charset_decl->();
2311     if ($content_attr) {
2312     my $content = $content_attr->value;
2313 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2314 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2315 wakaba 1.58 =(.+)\z!sx) {
2316 wakaba 1.32 $check_charset->($content_attr, $1);
2317     } else {
2318     $self->{onerror}->(node => $content_attr,
2319     type => 'meta content-type syntax error',
2320 wakaba 1.104 level => $self->{level}->{must});
2321 wakaba 1.85 }
2322     }
2323     } elsif ($keyword eq 'default-style') {
2324     ## ISSUE: Not defined yet in the spec.
2325     } elsif ($keyword eq 'refresh') {
2326     if ($content_attr) {
2327     my $content = $content_attr->value;
2328     if ($content =~ /\A[0-9]+\z/) {
2329     ## NOTE: Valid non-negative integer.
2330     #
2331 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2332 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2333     Whatpm::URIChecker->check_iri_reference ($content, sub {
2334 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2335 wakaba 1.106 }, $self->{level});
2336 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2337    
2338     $element_state->{uri_info}->{content}->{node} = $content_attr;
2339     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2340     ## TODO: absolute
2341     push @{$self->{return}->{uri}->{$content} ||= []},
2342     $element_state->{uri_info}->{content};
2343     } else {
2344     $self->{onerror}->(node => $content_attr,
2345     type => 'refresh:syntax error',
2346 wakaba 1.104 level => $self->{level}->{must});
2347 wakaba 1.32 }
2348     }
2349     } else {
2350     $self->{onerror}->(node => $http_equiv_attr,
2351 wakaba 1.104 type => 'enumerated:invalid',
2352     level => $self->{level}->{must});
2353 wakaba 1.32 }
2354     }
2355    
2356     if (defined $charset_attr) {
2357     $check_charset_decl->();
2358     $check_charset->($charset_attr, $charset_attr->value);
2359 wakaba 1.1 }
2360     },
2361     };
2362    
2363     $Element->{$HTML_NS}->{style} = {
2364 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2365 wakaba 1.40 %HTMLChecker,
2366     check_attrs => $GetHTMLAttrsChecker->({
2367 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2368     media => $HTMLMQAttrChecker,
2369     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2370     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2371     ## not different
2372 wakaba 1.49 }, {
2373     %HTMLAttrStatus,
2374 wakaba 1.82 %XHTML2CommonAttrStatus,
2375 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2376 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2377 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2378 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2379     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2380     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2381     scoped => FEATURE_HTML5_FD,
2382     title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2383     type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2384 wakaba 1.1 }),
2385 wakaba 1.40 check_start => sub {
2386     my ($self, $item, $element_state) = @_;
2387    
2388 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2389 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2390 wakaba 1.93 $type = 'text/css' unless defined $type;
2391     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2392     $type = "$1/$2";
2393     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2394     } else {
2395     ## NOTE: We don't know how parameters are handled by UAs. According to
2396     ## HTML5 specification, <style> with unknown parameters in |type=""|
2397     ## must be ignored.
2398     undef $type;
2399     }
2400     if (not defined $type) {
2401     $element_state->{allow_element} = 1; # invalid type=""
2402     } elsif ($type eq 'text/css') {
2403 wakaba 1.40 $element_state->{allow_element} = 0;
2404 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2405     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2406     # $element_state->{allow_element} = 1;
2407 wakaba 1.40 } else {
2408     $element_state->{allow_element} = 1; # unknown
2409     }
2410 wakaba 1.93 $element_state->{style_type} = $type;
2411 wakaba 1.79
2412     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2413     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2414 wakaba 1.107
2415     $element_state->{text} = '';
2416 wakaba 1.40 },
2417     check_child_element => sub {
2418     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2419     $child_is_transparent, $element_state) = @_;
2420 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2421     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2422 wakaba 1.40 $self->{onerror}->(node => $child_el,
2423     type => 'element not allowed:minus',
2424 wakaba 1.104 level => $self->{level}->{must});
2425 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2426     #
2427     } elsif ($element_state->{allow_element}) {
2428     #
2429     } else {
2430 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2431     level => $self->{level}->{must});
2432 wakaba 1.40 }
2433     },
2434     check_child_text => sub {
2435     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2436 wakaba 1.115 $element_state->{text} .= $child_node->data;
2437 wakaba 1.40 },
2438     check_end => sub {
2439     my ($self, $item, $element_state) = @_;
2440 wakaba 1.93 if (not defined $element_state->{style_type}) {
2441     ## NOTE: Invalid type=""
2442     #
2443     } elsif ($element_state->{style_type} eq 'text/css') {
2444 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2445     container_node => $item->{node},
2446 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2447 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2448     ## NOTE: XML content should be checked by THIS instance of checker
2449     ## as part of normal tree validation. However, we don't know of any
2450     ## XML-based styling language that can be used in HTML <style> element,
2451     ## such that we throw a "style language not supported" error.
2452 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2453     type => 'XML style lang',
2454     text => $element_state->{style_type},
2455     level => $self->{level}->{uncertain});
2456 wakaba 1.93 } else {
2457     ## NOTE: Should we raise some kind of error for,
2458     ## say, <style type="text/plaion">?
2459     $self->{onsubdoc}->({s => $element_state->{text},
2460     container_node => $item->{node},
2461     media_type => $element_state->{style_type},
2462     is_char_string => 1});
2463 wakaba 1.27 }
2464 wakaba 1.40
2465     $HTMLChecker{check_end}->(@_);
2466 wakaba 1.1 },
2467     };
2468 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2469 wakaba 1.1
2470     $Element->{$HTML_NS}->{body} = {
2471 wakaba 1.72 %HTMLFlowContentChecker,
2472 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2473 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2474     alink => $HTMLColorAttrChecker,
2475     background => $HTMLURIAttrChecker,
2476     bgcolor => $HTMLColorAttrChecker,
2477     link => $HTMLColorAttrChecker,
2478     text => $HTMLColorAttrChecker,
2479     vlink => $HTMLColorAttrChecker,
2480     }, {
2481 wakaba 1.49 %HTMLAttrStatus,
2482 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2483 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2484     background => FEATURE_M12N10_REC_DEPRECATED,
2485     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2486 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2487 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2488 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2489     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2490 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2491     vlink => FEATURE_M12N10_REC_DEPRECATED,
2492     }),
2493 wakaba 1.68 check_start => sub {
2494     my ($self, $item, $element_state) = @_;
2495    
2496     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2497 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2498     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2499 wakaba 1.68 },
2500 wakaba 1.1 };
2501    
2502     $Element->{$HTML_NS}->{section} = {
2503 wakaba 1.72 %HTMLFlowContentChecker,
2504 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2505 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2506     }, {
2507     %HTMLAttrStatus,
2508     %XHTML2CommonAttrStatus,
2509     }),
2510 wakaba 1.1 };
2511    
2512     $Element->{$HTML_NS}->{nav} = {
2513 wakaba 1.153 status => FEATURE_HTML5_LC,
2514 wakaba 1.72 %HTMLFlowContentChecker,
2515 wakaba 1.1 };
2516    
2517     $Element->{$HTML_NS}->{article} = {
2518 wakaba 1.174 %HTMLFlowContentChecker,
2519 wakaba 1.153 status => FEATURE_HTML5_LC,
2520 wakaba 1.174 check_attrs => $GetHTMLAttrsChecker->({
2521     pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2522     }, {
2523     %HTMLAttrStatus,
2524     # XXX cite
2525     pubdate => FEATURE_HTML5_LC,
2526     }),
2527     }; # article
2528 wakaba 1.1
2529     $Element->{$HTML_NS}->{blockquote} = {
2530 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2531 wakaba 1.72 %HTMLFlowContentChecker,
2532 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2533 wakaba 1.1 cite => $HTMLURIAttrChecker,
2534 wakaba 1.49 }, {
2535     %HTMLAttrStatus,
2536 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2537 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2538 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2539 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2540 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2541 wakaba 1.1 }),
2542 wakaba 1.66 check_start => sub {
2543     my ($self, $item, $element_state) = @_;
2544    
2545     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2546 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2547     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2548 wakaba 1.66 },
2549 wakaba 1.1 };
2550    
2551     $Element->{$HTML_NS}->{aside} = {
2552 wakaba 1.153 status => FEATURE_HTML5_LC,
2553 wakaba 1.72 %HTMLFlowContentChecker,
2554 wakaba 1.1 };
2555    
2556     $Element->{$HTML_NS}->{h1} = {
2557 wakaba 1.40 %HTMLPhrasingContentChecker,
2558 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2559 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2560     align => $GetHTMLEnumeratedAttrChecker->({
2561     left => 1, center => 1, right => 1, justify => 1,
2562     }),
2563     }, {
2564 wakaba 1.49 %HTMLAttrStatus,
2565 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2566 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2567 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2568 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2569 wakaba 1.49 }),
2570 wakaba 1.40 check_start => sub {
2571     my ($self, $item, $element_state) = @_;
2572     $self->{flag}->{has_hn} = 1;
2573 wakaba 1.79
2574     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2575     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2576 wakaba 1.1 },
2577     };
2578    
2579 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2580 wakaba 1.1
2581 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2582 wakaba 1.1
2583 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2584 wakaba 1.1
2585 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2586 wakaba 1.1
2587 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2588 wakaba 1.1
2589 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2590 wakaba 1.174
2591     # XXX footer in header is disallowed (HTML5 revision 3050)
2592 wakaba 1.29
2593 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2594 wakaba 1.153 status => FEATURE_HTML5_LC,
2595 wakaba 1.72 %HTMLFlowContentChecker,
2596 wakaba 1.40 check_start => sub {
2597     my ($self, $item, $element_state) = @_;
2598     $self->_add_minus_elements ($element_state,
2599     {$HTML_NS => {qw/header 1 footer 1/}},
2600 wakaba 1.58 $HTMLSectioningContent);
2601 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2602     $self->{flag}->{has_hn} = 0;
2603 wakaba 1.79
2604     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2605     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2606 wakaba 1.40 },
2607     check_end => sub {
2608     my ($self, $item, $element_state) = @_;
2609     $self->_remove_minus_elements ($element_state);
2610     unless ($self->{flag}->{has_hn}) {
2611     $self->{onerror}->(node => $item->{node},
2612 wakaba 1.104 type => 'element missing:hn',
2613     level => $self->{level}->{must});
2614 wakaba 1.40 }
2615     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2616 wakaba 1.1
2617 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2618 wakaba 1.1 },
2619 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2620 wakaba 1.1 };
2621    
2622     $Element->{$HTML_NS}->{footer} = {
2623 wakaba 1.153 status => FEATURE_HTML5_LC,
2624 wakaba 1.72 %HTMLFlowContentChecker,
2625 wakaba 1.40 check_start => sub {
2626     my ($self, $item, $element_state) = @_;
2627     $self->_add_minus_elements ($element_state,
2628     {$HTML_NS => {footer => 1}},
2629 wakaba 1.58 $HTMLSectioningContent,
2630 wakaba 1.57 $HTMLHeadingContent);
2631 wakaba 1.79
2632     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2633     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2634 wakaba 1.40 },
2635     check_end => sub {
2636     my ($self, $item, $element_state) = @_;
2637     $self->_remove_minus_elements ($element_state);
2638 wakaba 1.1
2639 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2640 wakaba 1.1 },
2641     };
2642    
2643     $Element->{$HTML_NS}->{address} = {
2644 wakaba 1.72 %HTMLFlowContentChecker,
2645 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2646 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2647     ## TODO: add test
2648     #align => $GetHTMLEnumeratedAttrChecker->({
2649     # left => 1, center => 1, right => 1, justify => 1,
2650     #}),
2651     }, {
2652 wakaba 1.49 %HTMLAttrStatus,
2653 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2654 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2655 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2656 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2657     sdapref => FEATURE_HTML20_RFC,
2658 wakaba 1.49 }),
2659 wakaba 1.40 check_start => sub {
2660     my ($self, $item, $element_state) = @_;
2661     $self->_add_minus_elements ($element_state,
2662     {$HTML_NS => {footer => 1, address => 1}},
2663     $HTMLSectioningContent, $HTMLHeadingContent);
2664 wakaba 1.79
2665     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2666     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2667 wakaba 1.40 },
2668     check_end => sub {
2669     my ($self, $item, $element_state) = @_;
2670     $self->_remove_minus_elements ($element_state);
2671 wakaba 1.29
2672 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2673 wakaba 1.29 },
2674 wakaba 1.1 };
2675    
2676     $Element->{$HTML_NS}->{p} = {
2677 wakaba 1.40 %HTMLPhrasingContentChecker,
2678 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2679 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2680     align => $GetHTMLEnumeratedAttrChecker->({
2681     left => 1, center => 1, right => 1, justify => 1,
2682     }),
2683     }, {
2684 wakaba 1.49 %HTMLAttrStatus,
2685 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2686 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2687 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2688 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2689 wakaba 1.49 }),
2690 wakaba 1.1 };
2691    
2692     $Element->{$HTML_NS}->{hr} = {
2693 wakaba 1.40 %HTMLEmptyChecker,
2694 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2695 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2696     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2697     }, {
2698 wakaba 1.49 %HTMLAttrStatus,
2699     %HTMLM12NCommonAttrStatus,
2700     align => FEATURE_M12N10_REC_DEPRECATED,
2701 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2702 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2703 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2704 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2705     width => FEATURE_M12N10_REC_DEPRECATED,
2706     }),
2707 wakaba 1.1 };
2708    
2709     $Element->{$HTML_NS}->{br} = {
2710 wakaba 1.40 %HTMLEmptyChecker,
2711 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2712 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2713     clear => $GetHTMLEnumeratedAttrChecker->({
2714     left => 1, all => 1, right => 1, none => 1,
2715     }),
2716     }, {
2717 wakaba 1.49 %HTMLAttrStatus,
2718 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2719 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2720 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2721 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2722 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2723     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2724 wakaba 1.49 }),
2725 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2726     ## (This requirement is semantic so that we cannot check.)
2727 wakaba 1.1 };
2728    
2729     $Element->{$HTML_NS}->{dialog} = {
2730 wakaba 1.153 status => FEATURE_HTML5_WD,
2731 wakaba 1.40 %HTMLChecker,
2732     check_start => sub {
2733     my ($self, $item, $element_state) = @_;
2734     $element_state->{phase} = 'before dt';
2735 wakaba 1.79
2736     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2737     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2738 wakaba 1.40 },
2739     check_child_element => sub {
2740     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2741     $child_is_transparent, $element_state) = @_;
2742 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2743     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2744 wakaba 1.40 $self->{onerror}->(node => $child_el,
2745     type => 'element not allowed:minus',
2746 wakaba 1.104 level => $self->{level}->{must});
2747 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2748     #
2749     } elsif ($element_state->{phase} eq 'before dt') {
2750     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2751     $element_state->{phase} = 'before dd';
2752     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2753     $self->{onerror}
2754 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2755     text => 'dt',
2756     level => $self->{level}->{must});
2757 wakaba 1.40 $element_state->{phase} = 'before dt';
2758     } else {
2759 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2760     level => $self->{level}->{must});
2761 wakaba 1.40 }
2762     } elsif ($element_state->{phase} eq 'before dd') {
2763     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2764     $element_state->{phase} = 'before dt';
2765     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2766     $self->{onerror}
2767 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2768     text => 'dd',
2769     level => $self->{level}->{must});
2770 wakaba 1.40 $element_state->{phase} = 'before dd';
2771     } else {
2772 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2773     level => $self->{level}->{must});
2774 wakaba 1.1 }
2775 wakaba 1.40 } else {
2776     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2777     }
2778     },
2779     check_child_text => sub {
2780     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2781     if ($has_significant) {
2782 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2783     level => $self->{level}->{must});
2784 wakaba 1.1 }
2785 wakaba 1.40 },
2786     check_end => sub {
2787     my ($self, $item, $element_state) = @_;
2788     if ($element_state->{phase} eq 'before dd') {
2789     $self->{onerror}->(node => $item->{node},
2790 wakaba 1.104 type => 'child element missing',
2791     text => 'dd',
2792     level => $self->{level}->{must});
2793 wakaba 1.1 }
2794 wakaba 1.40
2795     $HTMLChecker{check_end}->(@_);
2796 wakaba 1.1 },
2797     };
2798    
2799     $Element->{$HTML_NS}->{pre} = {
2800 wakaba 1.40 %HTMLPhrasingContentChecker,
2801 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2802 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2803     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2804     }, {
2805 wakaba 1.49 %HTMLAttrStatus,
2806 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2807 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2808 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2809 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2810     }),
2811 wakaba 1.101 check_end => sub {
2812     my ($self, $item, $element_state) = @_;
2813    
2814     ## TODO: Flag to enable/disable IDL checking?
2815 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2816 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2817     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2818     ## NOTE: pre.code > code.idl-code: WebIDL spec
2819     ## NOTE: pre.idl-code: DOM1 spec
2820     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2821     ## NOTE: pre.schema: ReSpec-generated specs
2822 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2823     container_node => $item->{node},
2824     media_type => 'text/x-webidl',
2825     is_char_string => 1});
2826     }
2827    
2828 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2829 wakaba 1.101 },
2830 wakaba 1.1 };
2831    
2832     $Element->{$HTML_NS}->{ol} = {
2833 wakaba 1.40 %HTMLChecker,
2834 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2835 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2836 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2837 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2838 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2839 wakaba 1.69 ## TODO: HTML4 |type|
2840 wakaba 1.49 }, {
2841     %HTMLAttrStatus,
2842 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2843 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2844 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2845 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2846     reversed => FEATURE_HTML5_WD,
2847 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2848 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2849     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2850 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2851 wakaba 1.1 }),
2852 wakaba 1.40 check_child_element => sub {
2853     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2854     $child_is_transparent, $element_state) = @_;
2855 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2856     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2857 wakaba 1.40 $self->{onerror}->(node => $child_el,
2858     type => 'element not allowed:minus',
2859 wakaba 1.104 level => $self->{level}->{must});
2860 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2861     #
2862     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2863     #
2864     } else {
2865 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2866     level => $self->{level}->{must});
2867 wakaba 1.1 }
2868 wakaba 1.40 },
2869     check_child_text => sub {
2870     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2871     if ($has_significant) {
2872 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2873     level => $self->{level}->{must});
2874 wakaba 1.1 }
2875     },
2876     };
2877    
2878     $Element->{$HTML_NS}->{ul} = {
2879 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2880 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2881 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2882     compact => $GetHTMLBooleanAttrChecker->('compact'),
2883 wakaba 1.69 ## TODO: HTML4 |type|
2884     ## TODO: sdaform, align
2885 wakaba 1.68 }, {
2886 wakaba 1.49 %HTMLAttrStatus,
2887 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2888 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2889 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2890 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2891 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2892 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2893     }),
2894 wakaba 1.1 };
2895    
2896 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2897     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2898     %{$Element->{$HTML_NS}->{ul}},
2899     status => FEATURE_M12N10_REC_DEPRECATED,
2900 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2901     compact => $GetHTMLBooleanAttrChecker->('compact'),
2902     }, {
2903 wakaba 1.64 %HTMLAttrStatus,
2904     %HTMLM12NCommonAttrStatus,
2905     align => FEATURE_HTML2X_RFC,
2906     compact => FEATURE_M12N10_REC_DEPRECATED,
2907 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2908 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2909     sdapref => FEATURE_HTML20_RFC,
2910     }),
2911     };
2912    
2913 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2914 wakaba 1.72 %HTMLFlowContentChecker,
2915 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2916 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2917 wakaba 1.69 ## TODO: HTML4 |type|
2918 wakaba 1.49 value => sub {
2919 wakaba 1.1 my ($self, $attr) = @_;
2920 wakaba 1.152
2921     my $parent_is_ol;
2922 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2923     if (defined $parent) {
2924     my $parent_ns = $parent->namespace_uri;
2925     $parent_ns = '' unless defined $parent_ns;
2926     my $parent_ln = $parent->manakai_local_name;
2927 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2928     }
2929    
2930     unless ($parent_is_ol) {
2931     ## ISSUE: No "MUST" in the spec.
2932     $self->{onerror}->(node => $attr,
2933     type => 'non-ol li value',
2934     level => $self->{level}->{html5_fact});
2935 wakaba 1.1 }
2936 wakaba 1.152
2937 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2938 wakaba 1.131 },
2939 wakaba 1.49 }, {
2940     %HTMLAttrStatus,
2941 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2942 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2943 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2944 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2945 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2946 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2947 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2948 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2949 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2950 wakaba 1.1 }),
2951 wakaba 1.40 check_child_element => sub {
2952     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2953     $child_is_transparent, $element_state) = @_;
2954     if ($self->{flag}->{in_menu}) {
2955 wakaba 1.152 ## TODO: In <dir> element, then ...
2956 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2957     } else {
2958 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2959 wakaba 1.40 }
2960     },
2961     check_child_text => sub {
2962     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2963     if ($self->{flag}->{in_menu}) {
2964 wakaba 1.152 ## TODO: In <dir> element, then ...
2965 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2966 wakaba 1.1 } else {
2967 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
2968 wakaba 1.1 }
2969     },
2970     };
2971    
2972     $Element->{$HTML_NS}->{dl} = {
2973 wakaba 1.40 %HTMLChecker,
2974 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2975 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2976     compact => $GetHTMLBooleanAttrChecker->('compact'),
2977     }, {
2978 wakaba 1.49 %HTMLAttrStatus,
2979 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2980 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2981 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2982 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2983     sdapref => FEATURE_HTML20_RFC,
2984 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2985     }),
2986 wakaba 1.40 check_start => sub {
2987     my ($self, $item, $element_state) = @_;
2988     $element_state->{phase} = 'before dt';
2989 wakaba 1.79
2990     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2991     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2992 wakaba 1.40 },
2993     check_child_element => sub {
2994     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2995     $child_is_transparent, $element_state) = @_;
2996 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2997     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2998 wakaba 1.40 $self->{onerror}->(node => $child_el,
2999     type => 'element not allowed:minus',
3000 wakaba 1.104 level => $self->{level}->{must});
3001 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3002     #
3003     } elsif ($element_state->{phase} eq 'in dds') {
3004     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3005     #$element_state->{phase} = 'in dds';
3006     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3007     $element_state->{phase} = 'in dts';
3008     } else {
3009 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3010     level => $self->{level}->{must});
3011 wakaba 1.40 }
3012     } elsif ($element_state->{phase} eq 'in dts') {
3013     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3014     #$element_state->{phase} = 'in dts';
3015     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3016     $element_state->{phase} = 'in dds';
3017     } else {
3018 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3019     level => $self->{level}->{must});
3020 wakaba 1.40 }
3021     } elsif ($element_state->{phase} eq 'before dt') {
3022     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3023     $element_state->{phase} = 'in dts';
3024     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3025     $self->{onerror}
3026 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3027     text => 'dt',
3028     level => $self->{level}->{must});
3029 wakaba 1.40 $element_state->{phase} = 'in dds';
3030     } else {
3031 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3032     level => $self->{level}->{must});
3033 wakaba 1.1 }
3034 wakaba 1.40 } else {
3035     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3036 wakaba 1.1 }
3037 wakaba 1.40 },
3038     check_child_text => sub {
3039     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3040     if ($has_significant) {
3041 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3042     level => $self->{level}->{must});
3043 wakaba 1.40 }
3044     },
3045     check_end => sub {
3046     my ($self, $item, $element_state) = @_;
3047     if ($element_state->{phase} eq 'in dts') {
3048     $self->{onerror}->(node => $item->{node},
3049 wakaba 1.104 type => 'child element missing',
3050     text => 'dd',
3051     level => $self->{level}->{must});
3052 wakaba 1.1 }
3053    
3054 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3055 wakaba 1.1 },
3056     };
3057    
3058     $Element->{$HTML_NS}->{dt} = {
3059 wakaba 1.40 %HTMLPhrasingContentChecker,
3060 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3061 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3062     %HTMLAttrStatus,
3063 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3064 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3065 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3066 wakaba 1.49 }),
3067 wakaba 1.1 };
3068    
3069     $Element->{$HTML_NS}->{dd} = {
3070 wakaba 1.72 %HTMLFlowContentChecker,
3071 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3072 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3073     %HTMLAttrStatus,
3074 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3075 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3076 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3077 wakaba 1.49 }),
3078 wakaba 1.1 };
3079    
3080     $Element->{$HTML_NS}->{a} = {
3081 wakaba 1.123 %HTMLTransparentChecker,
3082 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3083 wakaba 1.40 check_attrs => sub {
3084     my ($self, $item, $element_state) = @_;
3085 wakaba 1.1 my %attr;
3086 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3087 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3088     $attr_ns = '' unless defined $attr_ns;
3089     my $attr_ln = $attr->manakai_local_name;
3090     my $checker;
3091 wakaba 1.73 my $status;
3092 wakaba 1.1 if ($attr_ns eq '') {
3093 wakaba 1.73 $status = {
3094     %HTMLAttrStatus,
3095 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3096 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
3097 wakaba 1.73 charset => FEATURE_M12N10_REC,
3098 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3099 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3100     dn => FEATURE_RFC2659,
3101 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3102 wakaba 1.153 FEATURE_M12N10_REC,
3103     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3104     FEATURE_M12N10_REC,
3105     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3106     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3107 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3108     name => FEATURE_M12N10_REC_DEPRECATED,
3109     nonce => FEATURE_RFC2659,
3110     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3111     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3112 wakaba 1.153 ping => FEATURE_HTML5_WD,
3113 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3114     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3115 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3116 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3117 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3118 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3119     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3120 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3121     }->{$attr_ln};
3122    
3123 wakaba 1.1 $checker = {
3124 wakaba 1.91 charset => sub {
3125     my ($self, $attr) = @_;
3126     $HTMLCharsetChecker->($attr->value, @_);
3127     },
3128 wakaba 1.70 ## TODO: HTML4 |coords|
3129 wakaba 1.1 target => $HTMLTargetAttrChecker,
3130     href => $HTMLURIAttrChecker,
3131     ping => $HTMLSpaceURIsAttrChecker,
3132 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3133 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3134 wakaba 1.70 ## TODO: HTML4 |shape|
3135 wakaba 1.1 media => $HTMLMQAttrChecker,
3136 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3137 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3138     type => $HTMLIMTAttrChecker,
3139     }->{$attr_ln};
3140     if ($checker) {
3141     $attr{$attr_ln} = $attr;
3142 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3143     $attr_ln !~ /[A-Z]/) {
3144 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3145     $status = $HTMLDatasetAttrStatus;
3146 wakaba 1.1 } else {
3147     $checker = $HTMLAttrChecker->{$attr_ln};
3148     }
3149     }
3150     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3151     || $AttrChecker->{$attr_ns}->{''};
3152 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3153     || $AttrStatus->{$attr_ns}->{''};
3154     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3155 wakaba 1.62
3156 wakaba 1.1 if ($checker) {
3157 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3158 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3159 wakaba 1.54 #
3160 wakaba 1.1 } else {
3161 wakaba 1.104 $self->{onerror}->(node => $attr,
3162     type => 'unknown attribute',
3163     level => $self->{level}->{uncertain});
3164 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3165 wakaba 1.1 }
3166 wakaba 1.49
3167 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3168 wakaba 1.1 }
3169    
3170 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3171 wakaba 1.4 if (defined $attr{href}) {
3172     $self->{has_hyperlink_element} = 1;
3173 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3174 wakaba 1.4 } else {
3175 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3176     if (defined $attr{$_}) {
3177     $self->{onerror}->(node => $attr{$_},
3178 wakaba 1.104 type => 'attribute not allowed',
3179     level => $self->{level}->{must});
3180 wakaba 1.1 }
3181     }
3182     }
3183 wakaba 1.66
3184     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3185 wakaba 1.1 },
3186 wakaba 1.40 check_start => sub {
3187     my ($self, $item, $element_state) = @_;
3188     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3189 wakaba 1.79
3190     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3191     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3192 wakaba 1.40 },
3193     check_end => sub {
3194     my ($self, $item, $element_state) = @_;
3195     $self->_remove_minus_elements ($element_state);
3196 wakaba 1.59 delete $self->{flag}->{in_a_href}
3197     unless $element_state->{in_a_href_original};
3198 wakaba 1.1
3199 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3200 wakaba 1.1 },
3201     };
3202    
3203     $Element->{$HTML_NS}->{q} = {
3204 wakaba 1.153 status => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3205 wakaba 1.40 %HTMLPhrasingContentChecker,
3206     check_attrs => $GetHTMLAttrsChecker->({
3207 wakaba 1.50 cite => $HTMLURIAttrChecker,
3208     }, {
3209 wakaba 1.49 %HTMLAttrStatus,
3210 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3211 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3212     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3213 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3214     sdasuff => FEATURE_HTML2X_RFC,
3215 wakaba 1.1 }),
3216 wakaba 1.66 check_start => sub {
3217     my ($self, $item, $element_state) = @_;
3218    
3219     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3220 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3221     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3222 wakaba 1.66 },
3223 wakaba 1.1 };
3224 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3225     ## placed inside the <code>q</code> element." Though we cannot test the
3226     ## element against this requirement since it incluides a semantic bit,
3227     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3228     ## the |q| element.
3229 wakaba 1.1
3230     $Element->{$HTML_NS}->{cite} = {
3231 wakaba 1.40 %HTMLPhrasingContentChecker,
3232 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3233 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3234     %HTMLAttrStatus,
3235 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3236 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3237 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3238 wakaba 1.49 }),
3239 wakaba 1.1 };
3240    
3241     $Element->{$HTML_NS}->{em} = {
3242 wakaba 1.40 %HTMLPhrasingContentChecker,
3243 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3244 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3245     %HTMLAttrStatus,
3246 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3247 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3248 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3249 wakaba 1.49 }),
3250 wakaba 1.1 };
3251    
3252     $Element->{$HTML_NS}->{strong} = {
3253 wakaba 1.40 %HTMLPhrasingContentChecker,
3254 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3255 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3256     %HTMLAttrStatus,
3257 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3258 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3259 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3260 wakaba 1.49 }),
3261 wakaba 1.1 };
3262    
3263     $Element->{$HTML_NS}->{small} = {
3264 wakaba 1.40 %HTMLPhrasingContentChecker,
3265 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3266 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3267     %HTMLAttrStatus,
3268     %HTMLM12NCommonAttrStatus,
3269 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3270 wakaba 1.49 }),
3271 wakaba 1.1 };
3272    
3273 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3274     %HTMLPhrasingContentChecker,
3275     status => FEATURE_M12N10_REC,
3276     check_attrs => $GetHTMLAttrsChecker->({}, {
3277     %HTMLAttrStatus,
3278     %HTMLM12NCommonAttrStatus,
3279 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3280 wakaba 1.51 }),
3281     };
3282    
3283 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3284 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3285 wakaba 1.40 %HTMLPhrasingContentChecker,
3286 wakaba 1.1 };
3287    
3288     $Element->{$HTML_NS}->{dfn} = {
3289 wakaba 1.40 %HTMLPhrasingContentChecker,
3290 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3291 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3292     %HTMLAttrStatus,
3293 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3294 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3295 wakaba 1.49 }),
3296 wakaba 1.40 check_start => sub {
3297     my ($self, $item, $element_state) = @_;
3298     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3299 wakaba 1.1
3300 wakaba 1.40 my $node = $item->{node};
3301 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3302     unless (defined $term) {
3303     for my $child (@{$node->child_nodes}) {
3304     if ($child->node_type == 1) { # ELEMENT_NODE
3305     if (defined $term) {
3306     undef $term;
3307     last;
3308     } elsif ($child->manakai_local_name eq 'abbr') {
3309     my $nsuri = $child->namespace_uri;
3310     if (defined $nsuri and $nsuri eq $HTML_NS) {
3311     my $attr = $child->get_attribute_node_ns (undef, 'title');
3312     if ($attr) {
3313     $term = $attr->value;
3314     }
3315     }
3316     }
3317     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3318     ## TEXT_NODE or CDATA_SECTION_NODE
3319 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3320 wakaba 1.1 next;
3321     }
3322     undef $term;
3323     last;
3324     }
3325     }
3326     unless (defined $term) {
3327     $term = $node->text_content;
3328     }
3329     }
3330     if ($self->{term}->{$term}) {
3331     push @{$self->{term}->{$term}}, $node;
3332     } else {
3333     $self->{term}->{$term} = [$node];
3334     }
3335 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3336     ## |ruby| unless |dfn| has |title|.
3337 wakaba 1.79
3338     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3339     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3340 wakaba 1.40 },
3341     check_end => sub {
3342     my ($self, $item, $element_state) = @_;
3343     $self->_remove_minus_elements ($element_state);
3344 wakaba 1.1
3345 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3346 wakaba 1.1 },
3347     };
3348    
3349     $Element->{$HTML_NS}->{abbr} = {
3350 wakaba 1.40 %HTMLPhrasingContentChecker,
3351 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3352 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3353     %HTMLAttrStatus,
3354 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3355     full => FEATURE_XHTML2_ED,
3356 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3357 wakaba 1.49 }),
3358 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3359     ## number (plural vs singular) must match the grammatical number of the
3360     ## contents of the element." Though this can be checked by machine,
3361     ## it requires language-specific knowledge and dictionary, such that
3362     ## we don't support the check of the requirement.
3363     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3364 wakaba 1.49 };
3365    
3366     $Element->{$HTML_NS}->{acronym} = {
3367     %HTMLPhrasingContentChecker,
3368     status => FEATURE_M12N10_REC,
3369     check_attrs => $GetHTMLAttrsChecker->({}, {
3370     %HTMLAttrStatus,
3371     %HTMLM12NCommonAttrStatus,
3372 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3373 wakaba 1.49 }),
3374 wakaba 1.1 };
3375    
3376     $Element->{$HTML_NS}->{time} = {
3377 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3378 wakaba 1.40 %HTMLPhrasingContentChecker,
3379     check_attrs => $GetHTMLAttrsChecker->({
3380 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3381 wakaba 1.49 }, {
3382     %HTMLAttrStatus,
3383     %HTMLM12NCommonAttrStatus,
3384 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3385 wakaba 1.1 }),
3386 wakaba 1.168 ## TODO: Update definition
3387 wakaba 1.1 ## TODO: Write tests
3388 wakaba 1.40 check_end => sub {
3389     my ($self, $item, $element_state) = @_;
3390 wakaba 1.1
3391 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3392 wakaba 1.1 my $input;
3393     my $reg_sp;
3394     my $input_node;
3395     if ($attr) {
3396     $input = $attr->value;
3397 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3398 wakaba 1.1 $input_node = $attr;
3399     } else {
3400 wakaba 1.40 $input = $item->{node}->text_content;
3401 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3402 wakaba 1.40 $input_node = $item->{node};
3403 wakaba 1.1
3404     ## ISSUE: What is the definition for "successfully extracts a date
3405     ## or time"? If the algorithm says the string is invalid but
3406     ## return some date or time, is it "successfully"?
3407     }
3408    
3409     my $hour;
3410     my $minute;
3411     my $second;
3412     if ($input =~ /
3413     \A
3414 wakaba 1.112 $reg_sp
3415 wakaba 1.1 ([0-9]+) # 1
3416     (?>
3417     -([0-9]+) # 2
3418 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3419     $reg_sp
3420 wakaba 1.1 (?>
3421     T
3422 wakaba 1.112 $reg_sp
3423 wakaba 1.1 )?
3424     ([0-9]+) # 4
3425     :([0-9]+) # 5
3426     (?>
3427     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3428     )?
3429 wakaba 1.112 $reg_sp
3430 wakaba 1.1 (?>
3431     Z
3432 wakaba 1.112 $reg_sp
3433 wakaba 1.1 |
3434     [+-]([0-9]+):([0-9]+) # 7, 8
3435 wakaba 1.112 $reg_sp
3436 wakaba 1.1 )?
3437     \z
3438     |
3439     :([0-9]+) # 9
3440     (?>
3441     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3442     )?
3443 wakaba 1.112 $reg_sp
3444     \z
3445 wakaba 1.1 )
3446     /x) {
3447     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3448     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3449     length $4 != 2 or length $5 != 2) {
3450     $self->{onerror}->(node => $input_node,
3451 wakaba 1.104 type => 'dateortime:syntax error',
3452     level => $self->{level}->{must});
3453 wakaba 1.1 }
3454    
3455     if (1 <= $2 and $2 <= 12) {
3456 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3457     level => $self->{level}->{must})
3458 wakaba 1.1 if $3 < 1 or
3459     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3460 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3461     level => $self->{level}->{must})
3462 wakaba 1.1 if $2 == 2 and $3 == 29 and
3463     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3464     } else {
3465     $self->{onerror}->(node => $input_node,
3466 wakaba 1.104 type => 'datetime:bad month',
3467     level => $self->{level}->{must});
3468 wakaba 1.1 }
3469    
3470     ($hour, $minute, $second) = ($4, $5, $6);
3471    
3472     if (defined $7) { ## [+-]hh:mm
3473     if (length $7 != 2 or length $8 != 2) {
3474     $self->{onerror}->(node => $input_node,
3475 wakaba 1.104 type => 'dateortime:syntax error',
3476     level => $self->{level}->{must});
3477 wakaba 1.1 }
3478    
3479     $self->{onerror}->(node => $input_node,
3480 wakaba 1.104 type => 'datetime:bad timezone hour',
3481     level => $self->{level}->{must})
3482 wakaba 1.1 if $7 > 23;
3483     $self->{onerror}->(node => $input_node,
3484 wakaba 1.104 type => 'datetime:bad timezone minute',
3485     level => $self->{level}->{must})
3486 wakaba 1.1 if $8 > 59;
3487     }
3488     } else { ## hh:mm
3489     if (length $1 != 2 or length $9 != 2) {
3490     $self->{onerror}->(node => $input_node,
3491 wakaba 1.104 type => qq'dateortime:syntax error',
3492     level => $self->{level}->{must});
3493 wakaba 1.1 }
3494    
3495     ($hour, $minute, $second) = ($1, $9, $10);
3496     }
3497    
3498 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3499     level => $self->{level}->{must}) if $hour > 23;
3500     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3501     level => $self->{level}->{must}) if $minute > 59;
3502 wakaba 1.1
3503     if (defined $second) { ## s
3504     ## NOTE: Integer part of second don't have to have length of two.
3505    
3506     if (substr ($second, 0, 1) eq '.') {
3507     $self->{onerror}->(node => $input_node,
3508 wakaba 1.104 type => 'dateortime:syntax error',
3509     level => $self->{level}->{must});
3510 wakaba 1.1 }
3511    
3512 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3513     level => $self->{level}->{must}) if $second >= 60;
3514 wakaba 1.1 }
3515     } else {
3516     $self->{onerror}->(node => $input_node,
3517 wakaba 1.104 type => 'dateortime:syntax error',
3518     level => $self->{level}->{must});
3519 wakaba 1.1 }
3520    
3521 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3522 wakaba 1.1 },
3523     };
3524    
3525     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3526 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3527 wakaba 1.113 ## TODO: content checking
3528     ## TODO: content or value must contain number (rev 2053)
3529 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3530 wakaba 1.40 %HTMLPhrasingContentChecker,
3531     check_attrs => $GetHTMLAttrsChecker->({
3532 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3533     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3534     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3535     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3536     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3537     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3538 wakaba 1.50 }, {
3539     %HTMLAttrStatus,
3540     high => FEATURE_HTML5_DEFAULT,
3541     low => FEATURE_HTML5_DEFAULT,
3542     max => FEATURE_HTML5_DEFAULT,
3543     min => FEATURE_HTML5_DEFAULT,
3544     optimum => FEATURE_HTML5_DEFAULT,
3545     value => FEATURE_HTML5_DEFAULT,
3546 wakaba 1.1 }),
3547     };
3548    
3549     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3550 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3551 wakaba 1.40 %HTMLPhrasingContentChecker,
3552     check_attrs => $GetHTMLAttrsChecker->({
3553 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3554     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3555 wakaba 1.50 }, {
3556     %HTMLAttrStatus,
3557     max => FEATURE_HTML5_DEFAULT,
3558     value => FEATURE_HTML5_DEFAULT,
3559 wakaba 1.1 }),
3560     };
3561    
3562     $Element->{$HTML_NS}->{code} = {
3563 wakaba 1.40 %HTMLPhrasingContentChecker,
3564 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3565 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3566     %HTMLAttrStatus,
3567 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3568 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3569 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3570 wakaba 1.49 }),
3571 wakaba 1.1 };
3572    
3573     $Element->{$HTML_NS}->{var} = {
3574 wakaba 1.40 %HTMLPhrasingContentChecker,
3575 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3576 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3577     %HTMLAttrStatus,
3578 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3579 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3580 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3581 wakaba 1.49 }),
3582 wakaba 1.1 };
3583    
3584     $Element->{$HTML_NS}->{samp} = {
3585 wakaba 1.40 %HTMLPhrasingContentChecker,
3586 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3587 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3588     %HTMLAttrStatus,
3589 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3590 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3591 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3592 wakaba 1.49 }),
3593 wakaba 1.1 };
3594    
3595     $Element->{$HTML_NS}->{kbd} = {
3596 wakaba 1.40 %HTMLPhrasingContentChecker,
3597 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3598 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3599     %HTMLAttrStatus,
3600 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3601 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3602 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3603 wakaba 1.49 }),
3604 wakaba 1.1 };
3605    
3606     $Element->{$HTML_NS}->{sub} = {
3607 wakaba 1.40 %HTMLPhrasingContentChecker,
3608 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3609 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3610     %HTMLAttrStatus,
3611 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3612 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3613 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3614 wakaba 1.49 }),
3615 wakaba 1.1 };
3616    
3617 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3618 wakaba 1.1
3619     $Element->{$HTML_NS}->{span} = {
3620 wakaba 1.40 %HTMLPhrasingContentChecker,
3621 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3622 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3623     %HTMLAttrStatus,
3624 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3625 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3626     dataformatas => FEATURE_HTML4_REC_RESERVED,
3627     datasrc => FEATURE_HTML4_REC_RESERVED,
3628 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3629 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3630 wakaba 1.49 }),
3631 wakaba 1.1 };
3632    
3633     $Element->{$HTML_NS}->{i} = {
3634 wakaba 1.40 %HTMLPhrasingContentChecker,
3635 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3636     check_attrs => $GetHTMLAttrsChecker->({}, {
3637     %HTMLAttrStatus,
3638     %HTMLM12NCommonAttrStatus,
3639 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3640 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3641 wakaba 1.49 }),
3642 wakaba 1.1 };
3643    
3644 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3645    
3646 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3647     %HTMLPhrasingContentChecker,
3648     status => FEATURE_M12N10_REC,
3649     check_attrs => $GetHTMLAttrsChecker->({}, {
3650     %HTMLAttrStatus,
3651     %HTMLM12NCommonAttrStatus,
3652 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3653 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3654     }),
3655     };
3656 wakaba 1.51
3657     $Element->{$HTML_NS}->{s} = {
3658 wakaba 1.40 %HTMLPhrasingContentChecker,
3659 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3660 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3661     %HTMLAttrStatus,
3662     %HTMLM12NCommonAttrStatus,
3663 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3664 wakaba 1.49 }),
3665 wakaba 1.1 };
3666    
3667 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3668    
3669     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3670    
3671 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3672 wakaba 1.40 %HTMLPhrasingContentChecker,
3673 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3674 wakaba 1.40 check_attrs => sub {
3675     my ($self, $item, $element_state) = @_;
3676 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3677     %HTMLAttrStatus,
3678 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3679     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3680     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3681     style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3682     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3683     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3684 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3685     sdasuff => FEATURE_HTML2X_RFC,
3686 wakaba 1.49 })->($self, $item, $element_state);
3687 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3688     $self->{onerror}->(node => $item->{node},
3689 wakaba 1.104 type => 'attribute missing',
3690     text => 'dir',
3691     level => $self->{level}->{must});
3692 wakaba 1.1 }
3693     },
3694     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3695     };
3696    
3697 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3698     %HTMLPhrasingContentChecker,
3699     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3700     check_attrs => $GetHTMLAttrsChecker->({}, {
3701     %HTMLAttrStatus,
3702     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3703 wakaba 1.153 lang => FEATURE_HTML5_WD,
3704 wakaba 1.99 }),
3705     check_start => sub {
3706     my ($self, $item, $element_state) = @_;
3707    
3708     $element_state->{phase} = 'before-rb';
3709     #$element_state->{has_sig}
3710 wakaba 1.100
3711     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3712     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3713 wakaba 1.99 },
3714     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3715     check_child_element => sub {
3716     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3717     $child_is_transparent, $element_state) = @_;
3718 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3719     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3720 wakaba 1.99 $self->{onerror}->(node => $child_el,
3721     type => 'element not allowed:minus',
3722 wakaba 1.104 level => $self->{level}->{must});
3723 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3724     #
3725     } elsif ($element_state->{phase} eq 'before-rb') {
3726     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3727     $element_state->{phase} = 'in-rb';
3728     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3729     $self->{onerror}->(node => $child_el,
3730 wakaba 1.104 level => $self->{level}->{should},
3731     type => 'no significant content before');
3732 wakaba 1.99 $element_state->{phase} = 'after-rt';
3733     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3734     $self->{onerror}->(node => $child_el,
3735 wakaba 1.104 level => $self->{level}->{should},
3736     type => 'no significant content before');
3737 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3738     } else {
3739     $self->{onerror}->(node => $child_el,
3740 wakaba 1.104 type => 'element not allowed:ruby base',
3741     level => $self->{level}->{must});
3742 wakaba 1.99 $element_state->{phase} = 'in-rb';
3743     }
3744     } elsif ($element_state->{phase} eq 'in-rb') {
3745     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3746     #$element_state->{phase} = 'in-rb';
3747     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3748     unless ($element_state->{has_significant}) {
3749     $self->{onerror}->(node => $child_el,
3750 wakaba 1.104 level => $self->{level}->{should},
3751     type => 'no significant content before');
3752 wakaba 1.99 }
3753     $element_state->{phase} = 'after-rt';
3754     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3755     unless ($element_state->{has_significant}) {
3756     $self->{onerror}->(node => $child_el,
3757 wakaba 1.104 level => $self->{level}->{should},
3758     type => 'no significant content before');
3759 wakaba 1.99 }
3760     $element_state->{phase} = 'after-rp1';
3761     } else {
3762     $self->{onerror}->(node => $child_el,
3763 wakaba 1.104 type => 'element not allowed:ruby base',
3764     level => $self->{level}->{must});
3765 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3766     }
3767     } elsif ($element_state->{phase} eq 'after-rt') {
3768     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3769     if ($element_state->{has_significant}) {
3770     $element_state->{has_sig} = 1;
3771     delete $element_state->{has_significant};
3772     }
3773     $element_state->{phase} = 'in-rb';
3774     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3775     $self->{onerror}->(node => $child_el,
3776 wakaba 1.104 level => $self->{level}->{should},
3777     type => 'no significant content before');
3778 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3779     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3780     $self->{onerror}->(node => $child_el,
3781 wakaba 1.104 level => $self->{level}->{should},
3782     type => 'no significant content before');
3783 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3784     } else {
3785     $self->{onerror}->(node => $child_el,
3786 wakaba 1.104 type => 'element not allowed:ruby base',
3787     level => $self->{level}->{must});
3788 wakaba 1.99 if ($element_state->{has_significant}) {
3789     $element_state->{has_sig} = 1;
3790     delete $element_state->{has_significant};
3791     }
3792     $element_state->{phase} = 'in-rb';
3793     }
3794     } elsif ($element_state->{phase} eq 'after-rp1') {
3795     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3796     $element_state->{phase} = 'after-rp-rt';
3797     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3798     $self->{onerror}->(node => $child_el,
3799 wakaba 1.104 type => 'ps element missing',
3800     text => 'rt',
3801     level => $self->{level}->{must});
3802 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3803     } else {
3804     $self->{onerror}->(node => $child_el,
3805 wakaba 1.104 type => 'ps element missing',
3806     text => 'rt',
3807     level => $self->{level}->{must});
3808 wakaba 1.99 $self->{onerror}->(node => $child_el,
3809 wakaba 1.104 type => 'ps element missing',
3810     text => 'rp',
3811     level => $self->{level}->{must});
3812 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3813     $self->{onerror}->(node => $child_el,
3814 wakaba 1.104 type => 'element not allowed:ruby base',
3815     level => $self->{level}->{must});
3816 wakaba 1.99 }
3817     if ($element_state->{has_significant}) {
3818     $element_state->{has_sig} = 1;
3819     delete $element_state->{has_significant};
3820     }
3821     $element_state->{phase} = 'in-rb';
3822     }
3823     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3824     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3825     $element_state->{phase} = 'after-rp2';
3826     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3827     $self->{onerror}->(node => $child_el,
3828 wakaba 1.104 type => 'ps element missing',
3829     text => 'rp',
3830     level => $self->{level}->{must});
3831 wakaba 1.99 $self->{onerror}->(node => $child_el,
3832 wakaba 1.104 level => $self->{level}->{should},
3833     type => 'no significant content before');
3834 wakaba 1.99 $element_state->{phase} = 'after-rt';
3835     } else {
3836     $self->{onerror}->(node => $child_el,
3837 wakaba 1.104 type => 'ps element missing',
3838     text => 'rp',
3839     level => $self->{level}->{must});
3840 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3841     $self->{onerror}->(node => $child_el,
3842 wakaba 1.104 type => 'element not allowed:ruby base',
3843     level => $self->{level}->{must});
3844 wakaba 1.99 }
3845     if ($element_state->{has_significant}) {
3846     $element_state->{has_sig} = 1;
3847     delete $element_state->{has_significant};
3848     }
3849     $element_state->{phase} = 'in-rb';
3850     }
3851     } elsif ($element_state->{phase} eq 'after-rp2') {
3852     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3853     if ($element_state->{has_significant}) {
3854     $element_state->{has_sig} = 1;
3855     delete $element_state->{has_significant};
3856     }
3857     $element_state->{phase} = 'in-rb';
3858     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3859     $self->{onerror}->(node => $child_el,
3860 wakaba 1.104 level => $self->{level}->{should},
3861     type => 'no significant content before');
3862 wakaba 1.99 $element_state->{phase} = 'after-rt';
3863     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3864     $self->{onerror}->(node => $child_el,
3865 wakaba 1.104 level => $self->{level}->{should},
3866     type => 'no significant content before');
3867 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3868     } else {
3869     $self->{onerror}->(node => $child_el,
3870 wakaba 1.104 type => 'element not allowed:ruby base',
3871     level => $self->{level}->{must});
3872 wakaba 1.99 if ($element_state->{has_significant}) {
3873     $element_state->{has_sig} = 1;
3874     delete $element_state->{has_significant};
3875     }
3876     $element_state->{phase} = 'in-rb';
3877     }
3878     } else {
3879     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3880     }
3881     },
3882     check_child_text => sub {
3883     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3884     if ($has_significant) {
3885     if ($element_state->{phase} eq 'before-rb') {
3886     $element_state->{phase} = 'in-rb';
3887     } elsif ($element_state->{phase} eq 'in-rb') {
3888     #
3889     } elsif ($element_state->{phase} eq 'after-rt' or
3890     $element_state->{phase} eq 'after-rp2') {
3891     $element_state->{phase} = 'in-rb';
3892     } elsif ($element_state->{phase} eq 'after-rp1') {
3893     $self->{onerror}->(node => $child_node,
3894 wakaba 1.104 type => 'ps element missing',
3895     text => 'rt',
3896     level => $self->{level}->{must});
3897 wakaba 1.99 $self->{onerror}->(node => $child_node,
3898 wakaba 1.104 type => 'ps element missing',
3899     text => 'rp',
3900     level => $self->{level}->{must});
3901 wakaba 1.99 $element_state->{phase} = 'in-rb';
3902     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3903     $self->{onerror}->(node => $child_node,
3904 wakaba 1.104 type => 'ps element missing',
3905     text => 'rp',
3906     level => $self->{level}->{must});
3907 wakaba 1.99 $element_state->{phase} = 'in-rb';
3908     } else {
3909     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3910     }
3911     }
3912     },
3913     check_end => sub {
3914     my ($self, $item, $element_state) = @_;
3915     $self->_remove_minus_elements ($element_state);
3916    
3917     if ($element_state->{phase} eq 'before-rb') {
3918     $self->{onerror}->(node => $item->{node},
3919 wakaba 1.104 level => $self->{level}->{should},
3920 wakaba 1.99 type => 'no significant content');
3921     $self->{onerror}->(node => $item->{node},
3922 wakaba 1.104 type => 'element missing',
3923     text => 'rt',
3924     level => $self->{level}->{must});
3925 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3926     unless ($element_state->{has_significant}) {
3927     $self->{onerror}->(node => $item->{node},
3928 wakaba 1.104 level => $self->{level}->{should},
3929     type => 'no significant content at the end');
3930 wakaba 1.99 }
3931     $self->{onerror}->(node => $item->{node},
3932 wakaba 1.104 type => 'element missing',
3933     text => 'rt',
3934     level => $self->{level}->{must});
3935 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3936     $element_state->{phase} eq 'after-rp2') {
3937     #
3938     } elsif ($element_state->{phase} eq 'after-rp1') {
3939     $self->{onerror}->(node => $item->{node},
3940 wakaba 1.104 type => 'element missing',
3941     text => 'rt',
3942     level => $self->{level}->{must});
3943 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3944 wakaba 1.104 type => 'element missing',
3945     text => 'rp',
3946     level => $self->{level}->{must});
3947 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3948     $self->{onerror}->(node => $item->{node},
3949 wakaba 1.104 type => 'element missing',
3950     text => 'rp',
3951     level => $self->{level}->{must});
3952 wakaba 1.99 } else {
3953     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3954     }
3955    
3956     ## NOTE: A modified version of |check_end| of %AnyChecker.
3957     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3958     $item->{real_parent_state}->{has_significant} = 1;
3959     }
3960     },
3961     };
3962    
3963     $Element->{$HTML_NS}->{rt} = {
3964     %HTMLPhrasingContentChecker,
3965     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3966     check_attrs => $GetHTMLAttrsChecker->({}, {
3967     %HTMLAttrStatus,
3968     %HTMLM12NXHTML2CommonAttrStatus,
3969 wakaba 1.153 lang => FEATURE_HTML5_WD,
3970 wakaba 1.99 }),
3971     };
3972    
3973     $Element->{$HTML_NS}->{rp} = {
3974 wakaba 1.171 %HTMLPhrasingContentChecker,
3975 wakaba 1.99 status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3976     check_attrs => $GetHTMLAttrsChecker->({}, {
3977     %HTMLAttrStatus,
3978     %HTMLM12NXHTML2CommonAttrStatus,
3979 wakaba 1.153 lang => FEATURE_HTML5_WD,
3980 wakaba 1.99 }),
3981 wakaba 1.171 }; # rp
3982 wakaba 1.99
3983 wakaba 1.29 =pod
3984    
3985     ## TODO:
3986    
3987     +
3988     + <p>Partly because of the confusion described above, authors are
3989     + strongly recommended to always mark up all paragraphs with the
3990     + <code>p</code> element, and to not have any <code>ins</code> or
3991     + <code>del</code> elements that cross across any <span
3992     + title="paragraph">implied paragraphs</span>.</p>
3993     +
3994     (An informative note)
3995    
3996     <p><code>ins</code> elements should not cross <span
3997     + title="paragraph">implied paragraph</span> boundaries.</p>
3998     (normative)
3999    
4000     + <p><code>del</code> elements should not cross <span
4001     + title="paragraph">implied paragraph</span> boundaries.</p>
4002     (normative)
4003    
4004     =cut
4005    
4006 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4007 wakaba 1.40 %HTMLTransparentChecker,
4008 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4009 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4010 wakaba 1.1 cite => $HTMLURIAttrChecker,
4011 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4012 wakaba 1.49 }, {
4013     %HTMLAttrStatus,
4014     %HTMLM12NCommonAttrStatus,
4015 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4016 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4017     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4018 wakaba 1.1 }),
4019 wakaba 1.66 check_start => sub {
4020     my ($self, $item, $element_state) = @_;
4021    
4022     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4023 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4024     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4025 wakaba 1.66 },
4026 wakaba 1.1 };
4027    
4028     $Element->{$HTML_NS}->{del} = {
4029 wakaba 1.40 %HTMLTransparentChecker,
4030 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4031 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4032 wakaba 1.1 cite => $HTMLURIAttrChecker,
4033 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4034 wakaba 1.49 }, {
4035     %HTMLAttrStatus,
4036     %HTMLM12NCommonAttrStatus,
4037 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4038 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4039     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4040 wakaba 1.1 }),
4041 wakaba 1.40 check_end => sub {
4042     my ($self, $item, $element_state) = @_;
4043     if ($element_state->{has_significant}) {
4044     ## NOTE: Significantness flag does not propagate.
4045     } elsif ($item->{transparent}) {
4046     #
4047     } else {
4048     $self->{onerror}->(node => $item->{node},
4049 wakaba 1.104 level => $self->{level}->{should},
4050 wakaba 1.40 type => 'no significant content');
4051     }
4052 wakaba 1.1 },
4053 wakaba 1.66 check_start => sub {
4054     my ($self, $item, $element_state) = @_;
4055    
4056     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4057 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4058     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4059 wakaba 1.66 },
4060 wakaba 1.1 };
4061    
4062 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4063 wakaba 1.72 %HTMLFlowContentChecker,
4064 wakaba 1.153 status => FEATURE_HTML5_WD,
4065 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4066 wakaba 1.41 check_child_element => sub {
4067     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4068     $child_is_transparent, $element_state) = @_;
4069 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4070     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4071 wakaba 1.41 $self->{onerror}->(node => $child_el,
4072     type => 'element not allowed:minus',
4073 wakaba 1.104 level => $self->{level}->{must});
4074 wakaba 1.41 $element_state->{has_non_legend} = 1;
4075     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4076     #
4077     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4078     if ($element_state->{has_legend_at_first}) {
4079     $self->{onerror}->(node => $child_el,
4080     type => 'element not allowed:figure legend',
4081 wakaba 1.104 level => $self->{level}->{must});
4082 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4083     $self->{onerror}->(node => $element_state->{has_legend},
4084     type => 'element not allowed:figure legend',
4085 wakaba 1.104 level => $self->{level}->{must});
4086 wakaba 1.41 $element_state->{has_legend} = $child_el;
4087     } elsif ($element_state->{has_non_legend}) {
4088     $element_state->{has_legend} = $child_el;
4089     } else {
4090     $element_state->{has_legend_at_first} = 1;
4091 wakaba 1.35 }
4092 wakaba 1.41 delete $element_state->{has_non_legend};
4093     } else {
4094 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4095 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4096 wakaba 1.41 }
4097     },
4098     check_child_text => sub {
4099     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4100     if ($has_significant) {
4101     $element_state->{has_non_legend} = 1;
4102 wakaba 1.35 }
4103 wakaba 1.170
4104     $element_state->{in_figure} = 1;
4105 wakaba 1.41 },
4106     check_end => sub {
4107     my ($self, $item, $element_state) = @_;
4108 wakaba 1.35
4109 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4110     #
4111     } elsif ($element_state->{has_legend}) {
4112     if ($element_state->{has_non_legend}) {
4113     $self->{onerror}->(node => $element_state->{has_legend},
4114 wakaba 1.35 type => 'element not allowed:figure legend',
4115 wakaba 1.104 level => $self->{level}->{must});
4116 wakaba 1.35 }
4117     }
4118 wakaba 1.41
4119 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4120 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4121 wakaba 1.35 },
4122     };
4123 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4124 wakaba 1.1
4125 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4126     my ($self, $attr) = @_;
4127 wakaba 1.104 $self->{onerror}->(node => $attr,
4128     type => 'unknown attribute',
4129     level => $self->{level}->{uncertain});
4130 wakaba 1.92 };
4131    
4132 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4133 wakaba 1.40 %HTMLEmptyChecker,
4134 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4135 wakaba 1.40 check_attrs => sub {
4136     my ($self, $item, $element_state) = @_;
4137 wakaba 1.1 $GetHTMLAttrsChecker->({
4138 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4139     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4140     }),
4141 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4142 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4143 wakaba 1.1 src => $HTMLURIAttrChecker,
4144     usemap => $HTMLUsemapAttrChecker,
4145 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4146 wakaba 1.1 ismap => sub {
4147 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4148     if (not $self->{flag}->{in_a_href}) {
4149 wakaba 1.15 $self->{onerror}->(node => $attr,
4150 wakaba 1.59 type => 'attribute not allowed:ismap',
4151 wakaba 1.104 level => $self->{level}->{must});
4152 wakaba 1.1 }
4153 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4154 wakaba 1.1 },
4155 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4156     ## TODO: HTML4 |name|
4157 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4158 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4159 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4160 wakaba 1.49 }, {
4161     %HTMLAttrStatus,
4162 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4163 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4164 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4165 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4166 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4167 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4168 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4169     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4170 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4171     name => FEATURE_M12N10_REC_DEPRECATED,
4172 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4173 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4174     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4175 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4176 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4177 wakaba 1.66 })->($self, $item, $element_state);
4178 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4179     $self->{onerror}->(node => $item->{node},
4180 wakaba 1.104 type => 'attribute missing',
4181     text => 'alt',
4182     level => $self->{level}->{should});
4183 wakaba 1.114 ## TODO: ...
4184 wakaba 1.1 }
4185 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4186     $self->{onerror}->(node => $item->{node},
4187 wakaba 1.104 type => 'attribute missing',
4188     text => 'src',
4189     level => $self->{level}->{must});
4190 wakaba 1.1 }
4191 wakaba 1.66
4192 wakaba 1.114 ## TODO: external resource check
4193    
4194 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4195     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4196     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4197     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4198 wakaba 1.1 },
4199     };
4200    
4201     $Element->{$HTML_NS}->{iframe} = {
4202 wakaba 1.40 %HTMLTextChecker,
4203 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4204 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4205 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4206 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4207 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4208 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4209     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4210     }),
4211     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4212 wakaba 1.1 src => $HTMLURIAttrChecker,
4213 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4214 wakaba 1.49 }, {
4215     %HTMLAttrStatus,
4216     %HTMLM12NCommonAttrStatus,
4217     align => FEATURE_XHTML10_REC,
4218 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4219 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4220 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4221     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4222 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4223     marginheight => FEATURE_M12N10_REC,
4224     marginwidth => FEATURE_M12N10_REC,
4225 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4226     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4227     sandbox => FEATURE_HTML5_WD,
4228 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4229 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4230     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4231 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4232     width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4233 wakaba 1.1 }),
4234 wakaba 1.66 check_start => sub {
4235     my ($self, $item, $element_state) = @_;
4236    
4237     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4238 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4239     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4240 wakaba 1.66 },
4241 wakaba 1.40 };
4242    
4243 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4244 wakaba 1.40 %HTMLEmptyChecker,
4245 wakaba 1.98 status => FEATURE_HTML5_WD,
4246 wakaba 1.40 check_attrs => sub {
4247     my ($self, $item, $element_state) = @_;
4248 wakaba 1.1 my $has_src;
4249 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4250 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4251     $attr_ns = '' unless defined $attr_ns;
4252     my $attr_ln = $attr->manakai_local_name;
4253     my $checker;
4254 wakaba 1.73
4255     my $status = {
4256     %HTMLAttrStatus,
4257 wakaba 1.153 height => FEATURE_HTML5_LC,
4258 wakaba 1.98 src => FEATURE_HTML5_WD,
4259     type => FEATURE_HTML5_WD,
4260 wakaba 1.153 width => FEATURE_HTML5_LC,
4261 wakaba 1.73 }->{$attr_ln};
4262    
4263 wakaba 1.1 if ($attr_ns eq '') {
4264     if ($attr_ln eq 'src') {
4265     $checker = $HTMLURIAttrChecker;
4266     $has_src = 1;
4267     } elsif ($attr_ln eq 'type') {
4268     $checker = $HTMLIMTAttrChecker;
4269 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4270     $checker = $AttrCheckerNotImplemented; ## TODO: because spec does not define them yet.
4271 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4272     $attr_ln !~ /[A-Z]/) {
4273 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4274     $status = $HTMLDatasetAttrStatus;
4275 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4276 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4277 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4278 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4279     || sub { }; ## NOTE: Any local attribute is ok.
4280 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4281 wakaba 1.117 } else {
4282     $checker = $HTMLAttrChecker->{$attr_ln};
4283 wakaba 1.1 }
4284     }
4285     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4286 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4287     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4288     || $AttrStatus->{$attr_ns}->{''};
4289     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4290 wakaba 1.62
4291 wakaba 1.1 if ($checker) {
4292 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4293 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4294 wakaba 1.54 #
4295 wakaba 1.1 } else {
4296 wakaba 1.104 $self->{onerror}->(node => $attr,
4297     type => 'unknown attribute',
4298     level => $self->{level}->{uncertain});
4299 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4300     }
4301    
4302 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4303 wakaba 1.1 }
4304    
4305     unless ($has_src) {
4306 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4307 wakaba 1.104 type => 'attribute missing',
4308     text => 'src',
4309 wakaba 1.114 level => $self->{level}->{info});
4310     ## NOTE: <embed> without src="" is allowed since revision 1929.
4311     ## We issues an informational message since <embed> w/o src=""
4312     ## is likely an authoring error.
4313 wakaba 1.1 }
4314 wakaba 1.114
4315     ## TODO: external resource check
4316 wakaba 1.66
4317     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4318 wakaba 1.1 },
4319     };
4320    
4321 wakaba 1.49 ## TODO:
4322     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4323     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4324    
4325 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4326 wakaba 1.40 %HTMLTransparentChecker,
4327 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4328 wakaba 1.40 check_attrs => sub {
4329     my ($self, $item, $element_state) = @_;
4330 wakaba 1.1 $GetHTMLAttrsChecker->({
4331 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4332     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4333     }),
4334     archive => $HTMLSpaceURIsAttrChecker,
4335     ## TODO: Relative to @codebase
4336     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4337     classid => $HTMLURIAttrChecker,
4338     codebase => $HTMLURIAttrChecker,
4339     codetype => $HTMLIMTAttrChecker,
4340     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4341 wakaba 1.1 data => $HTMLURIAttrChecker,
4342 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4343     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4344     ## [HTML4] but we don't know how to test this.
4345 wakaba 1.167 form => $HTMLFormAttrChecker,
4346 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4347 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4348 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4349     ## the name of the browsing context created by the element,
4350     ## if any, but is also used as the form control name of the
4351     ## form control provided by the plugin, if any.
4352 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4353 wakaba 1.1 type => $HTMLIMTAttrChecker,
4354     usemap => $HTMLUsemapAttrChecker,
4355 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4356 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4357 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4358 wakaba 1.49 }, {
4359     %HTMLAttrStatus,
4360 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4361 wakaba 1.49 align => FEATURE_XHTML10_REC,
4362 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4363 wakaba 1.49 border => FEATURE_XHTML10_REC,
4364     classid => FEATURE_M12N10_REC,
4365     codebase => FEATURE_M12N10_REC,
4366     codetype => FEATURE_M12N10_REC,
4367 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4368 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4369 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4370     dataformatas => FEATURE_HTML4_REC_RESERVED,
4371     datasrc => FEATURE_HTML4_REC_RESERVED,
4372 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4373 wakaba 1.167 form => FEATURE_HTML5_DEFAULT,
4374 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4375 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4376 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4377     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4378 wakaba 1.49 standby => FEATURE_M12N10_REC,
4379 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4380 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4381     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4382 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4383 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4384 wakaba 1.66 })->($self, $item, $element_state);
4385 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4386     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4387     $self->{onerror}->(node => $item->{node},
4388 wakaba 1.104 type => 'attribute missing:data|type',
4389     level => $self->{level}->{must});
4390 wakaba 1.1 }
4391     }
4392 wakaba 1.66
4393     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4394     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4395     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4396     ## TODO: archive
4397     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4398 wakaba 1.1 },
4399 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4400 wakaba 1.41 check_child_element => sub {
4401     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4402     $child_is_transparent, $element_state) = @_;
4403 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4404     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4405 wakaba 1.41 $self->{onerror}->(node => $child_el,
4406     type => 'element not allowed:minus',
4407 wakaba 1.104 level => $self->{level}->{must});
4408 wakaba 1.41 $element_state->{has_non_legend} = 1;
4409     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4410     #
4411     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4412     if ($element_state->{has_non_param}) {
4413 wakaba 1.104 $self->{onerror}->(node => $child_el,
4414 wakaba 1.72 type => 'element not allowed:flow',
4415 wakaba 1.104 level => $self->{level}->{must});
4416 wakaba 1.39 }
4417 wakaba 1.41 } else {
4418 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4419 wakaba 1.41 $element_state->{has_non_param} = 1;
4420 wakaba 1.39 }
4421 wakaba 1.25 },
4422 wakaba 1.41 check_child_text => sub {
4423     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4424     if ($has_significant) {
4425     $element_state->{has_non_param} = 1;
4426     }
4427 wakaba 1.42 },
4428     check_end => sub {
4429     my ($self, $item, $element_state) = @_;
4430     if ($element_state->{has_significant}) {
4431 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4432 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4433     ## NOTE: Transparent.
4434     } else {
4435     $self->{onerror}->(node => $item->{node},
4436 wakaba 1.104 level => $self->{level}->{should},
4437 wakaba 1.42 type => 'no significant content');
4438     }
4439     },
4440 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4441 wakaba 1.1 };
4442 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4443     ## What about |<section><object data><style scoped></style>x</object></section>|?
4444     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4445 wakaba 1.1
4446     $Element->{$HTML_NS}->{param} = {
4447 wakaba 1.40 %HTMLEmptyChecker,
4448 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4449 wakaba 1.40 check_attrs => sub {
4450     my ($self, $item, $element_state) = @_;
4451 wakaba 1.1 $GetHTMLAttrsChecker->({
4452     name => sub { },
4453 wakaba 1.70 type => $HTMLIMTAttrChecker,
4454 wakaba 1.1 value => sub { },
4455 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4456     data => 1, ref => 1, object => 1,
4457     }),
4458 wakaba 1.49 }, {
4459     %HTMLAttrStatus,
4460 wakaba 1.154 href => FEATURE_RDFA_REC,
4461 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4462     name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4463 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4464 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4465 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4466 wakaba 1.66 })->(@_);
4467 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4468     $self->{onerror}->(node => $item->{node},
4469 wakaba 1.104 type => 'attribute missing',
4470     text => 'name',
4471     level => $self->{level}->{must});
4472 wakaba 1.1 }
4473 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4474     $self->{onerror}->(node => $item->{node},
4475 wakaba 1.104 type => 'attribute missing',
4476     text => 'value',
4477     level => $self->{level}->{must});
4478 wakaba 1.1 }
4479     },
4480     };
4481    
4482     $Element->{$HTML_NS}->{video} = {
4483 wakaba 1.40 %HTMLTransparentChecker,
4484 wakaba 1.48 status => FEATURE_HTML5_LC,
4485 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4486 wakaba 1.1 src => $HTMLURIAttrChecker,
4487     ## TODO: start, loopstart, loopend, end
4488     ## ISSUE: they MUST be "value time offset"s. Value?
4489 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4490 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4491     controls => $GetHTMLBooleanAttrChecker->('controls'),
4492 wakaba 1.59 poster => $HTMLURIAttrChecker,
4493 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4494     width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4495 wakaba 1.50 }, {
4496     %HTMLAttrStatus,
4497     autoplay => FEATURE_HTML5_LC,
4498     controls => FEATURE_HTML5_LC,
4499 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4500 wakaba 1.50 height => FEATURE_HTML5_LC,
4501 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4502     loopstart => FEATURE_HTML5_AT_RISK,
4503     playcount => FEATURE_HTML5_AT_RISK,
4504 wakaba 1.50 poster => FEATURE_HTML5_LC,
4505     src => FEATURE_HTML5_LC,
4506 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4507 wakaba 1.50 width => FEATURE_HTML5_LC,
4508 wakaba 1.1 }),
4509 wakaba 1.42 check_start => sub {
4510     my ($self, $item, $element_state) = @_;
4511     $element_state->{allow_source}
4512     = not $item->{node}->has_attribute_ns (undef, 'src');
4513     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4514     ## NOTE: It might be set true by |check_element|.
4515 wakaba 1.66
4516     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4517     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4518 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4519     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4520 wakaba 1.42 },
4521     check_child_element => sub {
4522     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4523     $child_is_transparent, $element_state) = @_;
4524 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4525     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4526 wakaba 1.42 $self->{onerror}->(node => $child_el,
4527     type => 'element not allowed:minus',
4528 wakaba 1.104 level => $self->{level}->{must});
4529 wakaba 1.42 delete $element_state->{allow_source};
4530     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4531     #
4532     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4533 wakaba 1.45 unless ($element_state->{allow_source}) {
4534 wakaba 1.104 $self->{onerror}->(node => $child_el,
4535 wakaba 1.72 type => 'element not allowed:flow',
4536 wakaba 1.104 level => $self->{level}->{must});
4537 wakaba 1.42 }
4538 wakaba 1.45 $element_state->{has_source} = 1;
4539 wakaba 1.1 } else {
4540 wakaba 1.42 delete $element_state->{allow_source};
4541 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4542 wakaba 1.42 }
4543     },
4544     check_child_text => sub {
4545     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4546     if ($has_significant) {
4547     delete $element_state->{allow_source};
4548     }
4549 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4550 wakaba 1.42 },
4551     check_end => sub {
4552     my ($self, $item, $element_state) = @_;
4553     if ($element_state->{has_source} == -1) {
4554     $self->{onerror}->(node => $item->{node},
4555 wakaba 1.104 type => 'child element missing',
4556     text => 'source',
4557     level => $self->{level}->{must});
4558 wakaba 1.1 }
4559 wakaba 1.42
4560     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4561 wakaba 1.1 },
4562     };
4563    
4564     $Element->{$HTML_NS}->{audio} = {
4565 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4566 wakaba 1.48 status => FEATURE_HTML5_LC,
4567 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4568     src => $HTMLURIAttrChecker,
4569     ## TODO: start, loopstart, loopend, end
4570     ## ISSUE: they MUST be "value time offset"s. Value?
4571     ## ISSUE: playcount has no conformance creteria
4572     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4573     controls => $GetHTMLBooleanAttrChecker->('controls'),
4574 wakaba 1.50 }, {
4575     %HTMLAttrStatus,
4576     autoplay => FEATURE_HTML5_LC,
4577     controls => FEATURE_HTML5_LC,
4578 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4579     loopend => FEATURE_HTML5_AT_RISK,
4580     loopstart => FEATURE_HTML5_AT_RISK,
4581     playcount => FEATURE_HTML5_AT_RISK,
4582 wakaba 1.50 src => FEATURE_HTML5_LC,
4583 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4584 wakaba 1.42 }),
4585 wakaba 1.1 };
4586    
4587     $Element->{$HTML_NS}->{source} = {
4588 wakaba 1.40 %HTMLEmptyChecker,
4589 wakaba 1.153 status => FEATURE_HTML5_LC,
4590 wakaba 1.40 check_attrs => sub {
4591     my ($self, $item, $element_state) = @_;
4592 wakaba 1.1 $GetHTMLAttrsChecker->({
4593 wakaba 1.90 media => $HTMLMQAttrChecker,
4594     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4595     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4596 wakaba 1.1 type => $HTMLIMTAttrChecker,
4597 wakaba 1.50 }, {
4598     %HTMLAttrStatus,
4599 wakaba 1.153 media => FEATURE_HTML5_LC,
4600     pixelratio => FEATURE_HTML5_LC,
4601     src => FEATURE_HTML5_LC,
4602     type => FEATURE_HTML5_LC,
4603 wakaba 1.66 })->(@_);
4604 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4605     $self->{onerror}->(node => $item->{node},
4606 wakaba 1.104 type => 'attribute missing',
4607     text => 'src',
4608     level => $self->{level}->{must});
4609 wakaba 1.1 }
4610 wakaba 1.66
4611     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4612 wakaba 1.1 },
4613     };
4614    
4615     $Element->{$HTML_NS}->{canvas} = {
4616 wakaba 1.40 %HTMLTransparentChecker,
4617 wakaba 1.89 status => FEATURE_HTML5_COMPLETE,
4618 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4619 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4620     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4621 wakaba 1.50 }, {
4622     %HTMLAttrStatus,
4623 wakaba 1.89 height => FEATURE_HTML5_COMPLETE,
4624     width => FEATURE_HTML5_COMPLETE,
4625 wakaba 1.1 }),
4626     };
4627    
4628     $Element->{$HTML_NS}->{map} = {
4629 wakaba 1.72 %HTMLFlowContentChecker,
4630 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4631 wakaba 1.40 check_attrs => sub {
4632     my ($self, $item, $element_state) = @_;
4633 wakaba 1.100 my $has_name;
4634 wakaba 1.4 $GetHTMLAttrsChecker->({
4635 wakaba 1.100 name => sub {
4636     my ($self, $attr) = @_;
4637     my $value = $attr->value;
4638     if (length $value) {
4639     ## NOTE: Duplication is not non-conforming.
4640     ## NOTE: Space characters are not non-conforming.
4641     #
4642     } else {
4643     $self->{onerror}->(node => $attr,
4644     type => 'empty attribute value',
4645 wakaba 1.104 level => $self->{level}->{must});
4646 wakaba 1.100 }
4647 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4648 wakaba 1.100 $has_name = [$value, $attr];
4649 wakaba 1.4 },
4650 wakaba 1.49 }, {
4651     %HTMLAttrStatus,
4652 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4653     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4654     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4655     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4656     #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4657     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4658 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4659     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4660     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4661     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4662     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4663     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4664     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4665     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4666     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4667     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4668 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4669 wakaba 1.66 })->(@_);
4670 wakaba 1.100
4671 wakaba 1.135 if ($has_name) {
4672 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4673 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4674 wakaba 1.155 $self->{onerror}
4675     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4676     type => 'id ne name',
4677     level => $self->{level}->{must});
4678 wakaba 1.100 }
4679 wakaba 1.135 } else {
4680 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4681 wakaba 1.104 type => 'attribute missing',
4682     text => 'name',
4683     level => $self->{level}->{must});
4684 wakaba 1.100 }
4685 wakaba 1.4 },
4686 wakaba 1.59 check_start => sub {
4687     my ($self, $item, $element_state) = @_;
4688     $element_state->{in_map_original} = $self->{flag}->{in_map};
4689 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4690     ## NOTE: |{in_map}| is a reference to the array which contains
4691     ## hash references. Hashes are corresponding to the opening
4692     ## |map| elements and each of them contains the key-value
4693     ## pairs corresponding to the absolute URLs for the processed
4694     ## |area| elements in the |map| element corresponding to the
4695     ## hash. The key represents the resource (## TODO: use
4696     ## absolute URL), while the value represents whether there is
4697     ## an |area| element whose |alt| attribute is specified to a
4698     ## non-empty value. If there IS such an |area| element for
4699     ## the resource specified by the key, then the value is set to
4700     ## zero (|0|). Otherwise, if there is no such an |area|
4701     ## element but there is any |area| element with the empty
4702     ## |alt=""| attribute, then the value contains an array
4703     ## reference that contains all of such |area| elements.
4704 wakaba 1.79
4705     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4706     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4707 wakaba 1.59 },
4708     check_end => sub {
4709     my ($self, $item, $element_state) = @_;
4710 wakaba 1.137
4711     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4712     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4713     next unless $nodes;
4714     for (@$nodes) {
4715     $self->{onerror}->(type => 'empty area alt',
4716     node => $_,
4717     level => $self->{level}->{html5_no_may});
4718     }
4719     }
4720    
4721     $self->{flag}->{in_map} = $element_state->{in_map_original};
4722    
4723 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4724 wakaba 1.59 },
4725 wakaba 1.1 };
4726    
4727     $Element->{$HTML_NS}->{area} = {
4728 wakaba 1.40 %HTMLEmptyChecker,
4729 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4730 wakaba 1.40 check_attrs => sub {
4731     my ($self, $item, $element_state) = @_;
4732 wakaba 1.1 my %attr;
4733     my $coords;
4734 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4735 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4736     $attr_ns = '' unless defined $attr_ns;
4737     my $attr_ln = $attr->manakai_local_name;
4738     my $checker;
4739 wakaba 1.73 my $status;
4740 wakaba 1.1 if ($attr_ns eq '') {
4741 wakaba 1.73 $status = {
4742     %HTMLAttrStatus,
4743     %HTMLM12NCommonAttrStatus,
4744 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
4745 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4746     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4747 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4748 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4749     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4750 wakaba 1.154 media => FEATURE_HTML5_WD,
4751 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4752     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4753     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4754 wakaba 1.153 ping => FEATURE_HTML5_WD,
4755 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4756 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4757 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4758 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4759     type => FEATURE_HTML5_WD,
4760 wakaba 1.73 }->{$attr_ln};
4761    
4762 wakaba 1.1 $checker = {
4763 wakaba 1.153 alt => sub {
4764     ## NOTE: Checked later.
4765     },
4766 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4767     circ => -1, circle => 1,
4768     default => 1,
4769     poly => 1, polygon => -1,
4770     rect => 1, rectangle => -1,
4771     }),
4772     coords => sub {
4773     my ($self, $attr) = @_;
4774     my $value = $attr->value;
4775     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4776     $coords = [split /,/, $value];
4777     } else {
4778     $self->{onerror}->(node => $attr,
4779 wakaba 1.104 type => 'coords:syntax error',
4780     level => $self->{level}->{must});
4781 wakaba 1.1 }
4782     },
4783 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4784     target => $HTMLTargetAttrChecker,
4785 wakaba 1.1 href => $HTMLURIAttrChecker,
4786     ping => $HTMLSpaceURIsAttrChecker,
4787 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4788 wakaba 1.1 media => $HTMLMQAttrChecker,
4789     hreflang => $HTMLLanguageTagAttrChecker,
4790     type => $HTMLIMTAttrChecker,
4791     }->{$attr_ln};
4792     if ($checker) {
4793     $attr{$attr_ln} = $attr;
4794 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4795     $attr_ln !~ /[A-Z]/) {
4796 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4797     $status = $HTMLDatasetAttrStatus;
4798 wakaba 1.1 } else {
4799     $checker = $HTMLAttrChecker->{$attr_ln};
4800     }
4801     }
4802     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4803 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4804     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4805     || $AttrStatus->{$attr_ns}->{''};
4806     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4807 wakaba 1.62
4808 wakaba 1.1 if ($checker) {
4809 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4810 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4811 wakaba 1.54 #
4812 wakaba 1.1 } else {
4813 wakaba 1.104 $self->{onerror}->(node => $attr,
4814     type => 'unknown attribute',
4815     level => $self->{level}->{uncertain});
4816 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4817     }
4818 wakaba 1.49
4819 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4820 wakaba 1.1 }
4821    
4822     if (defined $attr{href}) {
4823 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4824 wakaba 1.137 if (defined $attr{alt}) {
4825     my $url = $attr{href}->value; ## TODO: resolve
4826     if (length $attr{alt}->value) {
4827     for (@{$self->{flag}->{in_map} or []}) {
4828     $_->{$url} = 0;
4829     }
4830     } else {
4831     ## NOTE: Empty |alt=""|. If there is another |area| element
4832     ## with the same |href=""| and that |area| elemnet's
4833     ## |alt=""| attribute is not an empty string, then this
4834     ## is conforming.
4835     for (@{$self->{flag}->{in_map} or []}) {
4836     push @{$_->{$url} ||= []}, $attr{alt}
4837     unless exists $_->{$url} and not $_->{$url};
4838     }
4839     }
4840     } else {
4841 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4842 wakaba 1.104 type => 'attribute missing',
4843     text => 'alt',
4844     level => $self->{level}->{must});
4845 wakaba 1.1 }
4846     } else {
4847     for (qw/target ping rel media hreflang type alt/) {
4848     if (defined $attr{$_}) {
4849     $self->{onerror}->(node => $attr{$_},
4850 wakaba 1.104 type => 'attribute not allowed',
4851     level => $self->{level}->{must});
4852 wakaba 1.1 }
4853     }
4854     }
4855    
4856     my $shape = 'rectangle';
4857     if (defined $attr{shape}) {
4858     $shape = {
4859     circ => 'circle', circle => 'circle',
4860     default => 'default',
4861     poly => 'polygon', polygon => 'polygon',
4862     rect => 'rectangle', rectangle => 'rectangle',
4863     }->{lc $attr{shape}->value} || 'rectangle';
4864     ## TODO: ASCII lowercase?
4865     }
4866    
4867     if ($shape eq 'circle') {
4868     if (defined $attr{coords}) {
4869     if (defined $coords) {
4870     if (@$coords == 3) {
4871     if ($coords->[2] < 0) {
4872     $self->{onerror}->(node => $attr{coords},
4873 wakaba 1.104 type => 'coords:out of range',
4874     index => 2,
4875     value => $coords->[2],
4876     level => $self->{level}->{must});
4877 wakaba 1.1 }
4878     } else {
4879     $self->{onerror}->(node => $attr{coords},
4880 wakaba 1.104 type => 'coords:number not 3',
4881     text => 0+@$coords,
4882     level => $self->{level}->{must});
4883 wakaba 1.1 }
4884     } else {
4885     ## NOTE: A syntax error has been reported.
4886     }
4887     } else {
4888 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4889 wakaba 1.104 type => 'attribute missing',
4890     text => 'coords',
4891     level => $self->{level}->{must});
4892 wakaba 1.1 }
4893     } elsif ($shape eq 'default') {
4894     if (defined $attr{coords}) {
4895     $self->{onerror}->(node => $attr{coords},
4896 wakaba 1.104 type => 'attribute not allowed',
4897     level => $self->{level}->{must});
4898 wakaba 1.1 }
4899     } elsif ($shape eq 'polygon') {
4900     if (defined $attr{coords}) {
4901     if (defined $coords) {
4902     if (@$coords >= 6) {
4903     unless (@$coords % 2 == 0) {
4904     $self->{onerror}->(node => $attr{coords},
4905 wakaba 1.104 type => 'coords:number not even',
4906     text => 0+@$coords,
4907     level => $self->{level}->{must});
4908 wakaba 1.1 }
4909     } else {
4910     $self->{onerror}->(node => $attr{coords},
4911 wakaba 1.104 type => 'coords:number lt 6',
4912     text => 0+@$coords,
4913     level => $self->{level}->{must});
4914 wakaba 1.1 }
4915     } else {
4916     ## NOTE: A syntax error has been reported.
4917     }
4918     } else {
4919 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4920 wakaba 1.104 type => 'attribute missing',
4921     text => 'coords',
4922     level => $self->{level}->{must});
4923 wakaba 1.1 }
4924     } elsif ($shape eq 'rectangle') {
4925     if (defined $attr{coords}) {
4926     if (defined $coords) {
4927     if (@$coords == 4) {
4928     unless ($coords->[0] < $coords->[2]) {
4929     $self->{onerror}->(node => $attr{coords},
4930 wakaba 1.104 type => 'coords:out of range',
4931     index => 0,
4932     value => $coords->[0],
4933     level => $self->{level}->{must});
4934 wakaba 1.1 }
4935     unless ($coords->[1] < $coords->[3]) {
4936     $self->{onerror}->(node => $attr{coords},
4937 wakaba 1.104 type => 'coords:out of range',
4938     index => 1,
4939     value => $coords->[1],
4940     level => $self->{level}->{must});
4941 wakaba 1.1 }
4942     } else {
4943     $self->{onerror}->(node => $attr{coords},
4944 wakaba 1.104 type => 'coords:number not 4',
4945     text => 0+@$coords,
4946     level => $self->{level}->{must});
4947 wakaba 1.1 }
4948     } else {
4949     ## NOTE: A syntax error has been reported.
4950     }
4951     } else {
4952 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4953 wakaba 1.104 type => 'attribute missing',
4954     text => 'coords',
4955     level => $self->{level}->{must});
4956 wakaba 1.1 }
4957     }
4958 wakaba 1.66
4959     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
4960 wakaba 1.1 },
4961 wakaba 1.59 check_start => sub {
4962     my ($self, $item, $element_state) = @_;
4963     unless ($self->{flag}->{in_map} or
4964     not $item->{node}->manakai_parent_element) {
4965     $self->{onerror}->(node => $item->{node},
4966     type => 'element not allowed:area',
4967 wakaba 1.104 level => $self->{level}->{must});
4968 wakaba 1.59 }
4969 wakaba 1.79
4970     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4971     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4972 wakaba 1.59 },
4973 wakaba 1.1 };
4974    
4975     $Element->{$HTML_NS}->{table} = {
4976 wakaba 1.40 %HTMLChecker,
4977 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4978 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
4979 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
4980     cellspacing => $HTMLLengthAttrChecker,
4981 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
4982     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
4983     lhs => 1, rhs => 1, box => 1, border => 1,
4984     }),
4985     rules => $GetHTMLEnumeratedAttrChecker->({
4986     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
4987     }),
4988     summary => sub {}, ## NOTE: %Text; in HTML4.
4989     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
4990     }, {
4991 wakaba 1.49 %HTMLAttrStatus,
4992 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4993 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4994     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
4995     border => FEATURE_M12N10_REC,
4996     cellpadding => FEATURE_M12N10_REC,
4997     cellspacing => FEATURE_M12N10_REC,
4998 wakaba 1.61 cols => FEATURE_RFC1942,
4999 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5000     dataformatas => FEATURE_HTML4_REC_RESERVED,
5001     datapagesize => FEATURE_M12N10_REC,
5002     datasrc => FEATURE_HTML4_REC_RESERVED,
5003     frame => FEATURE_M12N10_REC,
5004 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5005 wakaba 1.49 rules => FEATURE_M12N10_REC,
5006     summary => FEATURE_M12N10_REC,
5007     width => FEATURE_M12N10_REC,
5008     }),
5009 wakaba 1.40 check_start => sub {
5010     my ($self, $item, $element_state) = @_;
5011     $element_state->{phase} = 'before caption';
5012 wakaba 1.66
5013     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5014 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5015     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5016 wakaba 1.40 },
5017     check_child_element => sub {
5018     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5019     $child_is_transparent, $element_state) = @_;
5020 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5021     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5022 wakaba 1.40 $self->{onerror}->(node => $child_el,
5023     type => 'element not allowed:minus',
5024 wakaba 1.104 level => $self->{level}->{must});
5025 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5026     #
5027     } elsif ($element_state->{phase} eq 'in tbodys') {
5028     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5029     #$element_state->{phase} = 'in tbodys';
5030     } elsif (not $element_state->{has_tfoot} and
5031     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5032     $element_state->{phase} = 'after tfoot';
5033     $element_state->{has_tfoot} = 1;
5034     } else {
5035 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5036     level => $self->{level}->{must});
5037 wakaba 1.40 }
5038     } elsif ($element_state->{phase} eq 'in trs') {
5039     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5040     #$element_state->{phase} = 'in trs';
5041     } elsif (not $element_state->{has_tfoot} and
5042     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5043     $element_state->{phase} = 'after tfoot';
5044     $element_state->{has_tfoot} = 1;
5045     } else {
5046 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5047     level => $self->{level}->{must});
5048 wakaba 1.40 }
5049     } elsif ($element_state->{phase} eq 'after thead') {
5050     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5051     $element_state->{phase} = 'in tbodys';
5052     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5053     $element_state->{phase} = 'in trs';
5054     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5055     $element_state->{phase} = 'in tbodys';
5056     $element_state->{has_tfoot} = 1;
5057     } else {
5058 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5059     level => $self->{level}->{must});
5060 wakaba 1.40 }
5061     } elsif ($element_state->{phase} eq 'in colgroup') {
5062     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5063     $element_state->{phase} = 'in colgroup';
5064     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5065     $element_state->{phase} = 'after thead';
5066     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5067     $element_state->{phase} = 'in tbodys';
5068     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5069     $element_state->{phase} = 'in trs';
5070     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5071     $element_state->{phase} = 'in tbodys';
5072     $element_state->{has_tfoot} = 1;
5073     } else {
5074 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5075     level => $self->{level}->{must});
5076 wakaba 1.40 }
5077     } elsif ($element_state->{phase} eq 'before caption') {
5078     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5079     $element_state->{phase} = 'in colgroup';
5080     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5081     $element_state->{phase} = 'in colgroup';
5082     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5083     $element_state->{phase} = 'after thead';
5084     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5085     $element_state->{phase} = 'in tbodys';
5086     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5087     $element_state->{phase} = 'in trs';
5088     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5089     $element_state->{phase} = 'in tbodys';
5090     $element_state->{has_tfoot} = 1;
5091     } else {
5092 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5093     level => $self->{level}->{must});
5094 wakaba 1.40 }
5095     } elsif ($element_state->{phase} eq 'after tfoot') {
5096 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5097     level => $self->{level}->{must});
5098 wakaba 1.40 } else {
5099     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5100     }
5101     },
5102     check_child_text => sub {
5103     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5104     if ($has_significant) {
5105 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5106     level => $self->{level}->{must});
5107 wakaba 1.1 }
5108 wakaba 1.40 },
5109     check_end => sub {
5110     my ($self, $item, $element_state) = @_;
5111 wakaba 1.1
5112     ## Table model errors
5113     require Whatpm::HTMLTable;
5114 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5115 wakaba 1.104 $self->{onerror}->(@_);
5116     }, $self->{level});
5117 wakaba 1.87 Whatpm::HTMLTable->assign_header
5118 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5119 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5120 wakaba 1.1
5121 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5122 wakaba 1.1 },
5123     };
5124    
5125     $Element->{$HTML_NS}->{caption} = {
5126 wakaba 1.169 %HTMLFlowContentChecker,
5127 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5128 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5129     align => $GetHTMLEnumeratedAttrChecker->({
5130     top => 1, bottom => 1, left => 1, right => 1,
5131     }),
5132     }, {
5133 wakaba 1.49 %HTMLAttrStatus,
5134 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5135 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5136 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5137 wakaba 1.49 }),
5138 wakaba 1.169 check_start => sub {
5139     my ($self, $item, $element_state) = @_;
5140     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5141    
5142     $HTMLFlowContentChecker{check_start}->(@_);
5143     },
5144     check_end => sub {
5145     my ($self, $item, $element_state) = @_;
5146     $self->_remove_minus_elements ($element_state);
5147    
5148     $HTMLFlowContentChecker{check_end}->(@_);
5149     },
5150     }; # caption
5151 wakaba 1.1
5152 wakaba 1.69 my %cellalign = (
5153     ## HTML4 %cellhalign;
5154 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5155     left => 1, center => 1, right => 1, justify => 1, char => 1,
5156     }),
5157     char => sub {
5158     my ($self, $attr) = @_;
5159 wakaba 1.69
5160 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5161    
5162     my $value = $attr->value;
5163     if (length $value != 1) {
5164     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5165 wakaba 1.105 level => $self->{level}->{html4_fact});
5166 wakaba 1.70 }
5167     },
5168 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5169    
5170 wakaba 1.69 ## HTML4 %cellvalign;
5171 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5172     top => 1, middle => 1, bottom => 1, baseline => 1,
5173     }),
5174 wakaba 1.69 );
5175    
5176 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5177 wakaba 1.40 %HTMLEmptyChecker,
5178 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5179 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5180 wakaba 1.69 %cellalign,
5181 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5182     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5183     ## TODO: "attribute not supported" if |col|.
5184     ## ISSUE: MUST NOT if any |col|?
5185     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5186 wakaba 1.49 }, {
5187     %HTMLAttrStatus,
5188 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5189 wakaba 1.49 align => FEATURE_M12N10_REC,
5190     char => FEATURE_M12N10_REC,
5191     charoff => FEATURE_M12N10_REC,
5192 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5193     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5194 wakaba 1.49 valign => FEATURE_M12N10_REC,
5195     width => FEATURE_M12N10_REC,
5196 wakaba 1.1 }),
5197 wakaba 1.40 check_child_element => sub {
5198     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5199     $child_is_transparent, $element_state) = @_;
5200 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5201     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5202 wakaba 1.40 $self->{onerror}->(node => $child_el,
5203     type => 'element not allowed:minus',
5204 wakaba 1.104 level => $self->{level}->{must});
5205 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5206     #
5207     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5208     #
5209     } else {
5210 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5211     level => $self->{level}->{must});
5212 wakaba 1.40 }
5213     },
5214     check_child_text => sub {
5215     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5216     if ($has_significant) {
5217 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5218     level => $self->{level}->{must});
5219 wakaba 1.1 }
5220     },
5221     };
5222    
5223     $Element->{$HTML_NS}->{col} = {
5224 wakaba 1.40 %HTMLEmptyChecker,
5225 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5226 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5227 wakaba 1.69 %cellalign,
5228 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5229 wakaba 1.49 }, {
5230     %HTMLAttrStatus,
5231 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5232 wakaba 1.49 align => FEATURE_M12N10_REC,
5233     char => FEATURE_M12N10_REC,
5234     charoff => FEATURE_M12N10_REC,
5235 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5236     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5237 wakaba 1.49 valign => FEATURE_M12N10_REC,
5238     width => FEATURE_M12N10_REC,
5239 wakaba 1.1 }),
5240     };
5241    
5242     $Element->{$HTML_NS}->{tbody} = {
5243 wakaba 1.40 %HTMLChecker,
5244 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5245 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5246     %cellalign,
5247     }, {
5248 wakaba 1.49 %HTMLAttrStatus,
5249 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5250 wakaba 1.49 align => FEATURE_M12N10_REC,
5251     char => FEATURE_M12N10_REC,
5252     charoff => FEATURE_M12N10_REC,
5253 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5254 wakaba 1.49 valign => FEATURE_M12N10_REC,
5255     }),
5256 wakaba 1.40 check_child_element => sub {
5257     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5258     $child_is_transparent, $element_state) = @_;
5259 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5260     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5261 wakaba 1.40 $self->{onerror}->(node => $child_el,
5262     type => 'element not allowed:minus',
5263 wakaba 1.104 level => $self->{level}->{must});
5264 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5265     #
5266     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5267 wakaba 1.84 #
5268 wakaba 1.40 } else {
5269 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5270     level => $self->{level}->{must});
5271 wakaba 1.40 }
5272     },
5273     check_child_text => sub {
5274     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5275     if ($has_significant) {
5276 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5277     level => $self->{level}->{must});
5278 wakaba 1.1 }
5279 wakaba 1.40 },
5280 wakaba 1.1 };
5281    
5282     $Element->{$HTML_NS}->{thead} = {
5283 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5284 wakaba 1.1 };
5285    
5286     $Element->{$HTML_NS}->{tfoot} = {
5287 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5288 wakaba 1.1 };
5289    
5290     $Element->{$HTML_NS}->{tr} = {
5291 wakaba 1.40 %HTMLChecker,
5292 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5293 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5294     %cellalign,
5295     bgcolor => $HTMLColorAttrChecker,
5296     }, {
5297 wakaba 1.49 %HTMLAttrStatus,
5298 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5299 wakaba 1.49 align => FEATURE_M12N10_REC,
5300     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5301     char => FEATURE_M12N10_REC,
5302     charoff => FEATURE_M12N10_REC,
5303 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5304 wakaba 1.49 valign => FEATURE_M12N10_REC,
5305     }),
5306 wakaba 1.40 check_child_element => sub {
5307     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5308     $child_is_transparent, $element_state) = @_;
5309 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5310     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5311 wakaba 1.40 $self->{onerror}->(node => $child_el,
5312     type => 'element not allowed:minus',
5313 wakaba 1.104 level => $self->{level}->{must});
5314 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5315     #
5316     } elsif ($child_nsuri eq $HTML_NS and
5317     ($child_ln eq 'td' or $child_ln eq 'th')) {
5318 wakaba 1.84 #
5319 wakaba 1.40 } else {
5320 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5321     level => $self->{level}->{must});
5322 wakaba 1.40 }
5323     },
5324     check_child_text => sub {
5325     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5326     if ($has_significant) {
5327 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5328     level => $self->{level}->{must});
5329 wakaba 1.1 }
5330     },
5331     };
5332    
5333     $Element->{$HTML_NS}->{td} = {
5334 wakaba 1.72 %HTMLFlowContentChecker,
5335 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5336 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5337 wakaba 1.69 %cellalign,
5338     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5339     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5340     bgcolor => $HTMLColorAttrChecker,
5341 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5342 wakaba 1.87 headers => sub {
5343     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5344     ## Though that method does not check the |headers| attribute of a
5345     ## |td| element if the element does not form a table, in that case
5346     ## the |td| element is non-conforming anyway.
5347     },
5348 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5349 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5350 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5351     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5352 wakaba 1.49 }, {
5353     %HTMLAttrStatus,
5354 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5355     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5356 wakaba 1.49 align => FEATURE_M12N10_REC,
5357 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5358 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5359     char => FEATURE_M12N10_REC,
5360     charoff => FEATURE_M12N10_REC,
5361 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5362     headers => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5363 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5364 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5365 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5366 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5367 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5368 wakaba 1.49 valign => FEATURE_M12N10_REC,
5369     width => FEATURE_M12N10_REC_DEPRECATED,
5370 wakaba 1.1 }),
5371     };
5372    
5373     $Element->{$HTML_NS}->{th} = {
5374 wakaba 1.40 %HTMLPhrasingContentChecker,
5375 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5376 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5377 wakaba 1.69 %cellalign,
5378     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5379     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5380     bgcolor => $HTMLColorAttrChecker,
5381 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5382 wakaba 1.87 ## TODO: HTML4(?) |headers|
5383 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5384 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5385     scope => $GetHTMLEnumeratedAttrChecker
5386     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5387 wakaba 1.49 }, {
5388     %HTMLAttrStatus,
5389 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5390     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5391 wakaba 1.49 align => FEATURE_M12N10_REC,
5392 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5393 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5394     char => FEATURE_M12N10_REC,
5395     charoff => FEATURE_M12N10_REC,
5396 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5397 wakaba 1.82 headers => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5398 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5399 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5400 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5401 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5402     scope => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5403 wakaba 1.49 valign => FEATURE_M12N10_REC,
5404     width => FEATURE_M12N10_REC_DEPRECATED,
5405 wakaba 1.1 }),
5406     };
5407    
5408 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5409 wakaba 1.121 %HTMLFlowContentChecker,
5410 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5411 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5412 wakaba 1.161 accept => $AcceptAttrChecker,
5413 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5414 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5415 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5416 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5417     'application/x-www-form-urlencoded' => 1,
5418     'multipart/form-data' => 1,
5419     'text/plain' => 1,
5420     }),
5421 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5422     get => 1, post => 1, put => 1, delete => 1,
5423     }),
5424 wakaba 1.133 name => sub {
5425     my ($self, $attr) = @_;
5426    
5427     my $value = $attr->value;
5428     if ($value eq '') {
5429     $self->{onerror}->(type => 'empty form name',
5430     node => $attr,
5431     level => $self->{level}->{must});
5432     } else {
5433     if ($self->{form}->{$value}) {
5434     $self->{onerror}->(type => 'duplicate form name',
5435     node => $attr,
5436     value => $value,
5437     level => $self->{level}->{must});
5438     } else {
5439     $self->{form}->{$value} = 1;
5440     }
5441     }
5442     },
5443 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5444     ## TODO: Tests for following attrs:
5445 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5446     onforminput => $HTMLEventHandlerAttrChecker,
5447 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5448     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5449 wakaba 1.52 target => $HTMLTargetAttrChecker,
5450     }, {
5451     %HTMLAttrStatus,
5452     %HTMLM12NCommonAttrStatus,
5453 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5454 wakaba 1.119 'accept-charset' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5455     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5456 wakaba 1.56 data => FEATURE_WF2,
5457 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5458 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5459 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5460     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5461     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5462 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5463 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5464     onforminput => FEATURE_WF2_INFORMATIVE,
5465 wakaba 1.56 onreceived => FEATURE_WF2,
5466 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5467     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5468 wakaba 1.56 replace => FEATURE_WF2,
5469 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5470     sdasuff => FEATURE_HTML20_RFC,
5471 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5472 wakaba 1.52 }),
5473 wakaba 1.66 check_start => sub {
5474     my ($self, $item, $element_state) = @_;
5475 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5476 wakaba 1.66
5477     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5478     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5479 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5480     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5481 wakaba 1.136 $element_state->{id_type} = 'form';
5482 wakaba 1.66 },
5483 wakaba 1.121 check_end => sub {
5484     my ($self, $item, $element_state) = @_;
5485     $self->_remove_minus_elements ($element_state);
5486    
5487     $HTMLFlowContentChecker{check_end}->(@_);
5488     },
5489 wakaba 1.52 };
5490    
5491     $Element->{$HTML_NS}->{fieldset} = {
5492 wakaba 1.134 %HTMLFlowContentChecker,
5493 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5494 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5495     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5496 wakaba 1.136 form => $HTMLFormAttrChecker,
5497 wakaba 1.165 name => $FormControlNameAttrChecker,
5498 wakaba 1.56 }, {
5499 wakaba 1.52 %HTMLAttrStatus,
5500     %HTMLM12NCommonAttrStatus,
5501 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5502     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5503 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5504 wakaba 1.125 name => FEATURE_HTML5_DEFAULT,
5505 wakaba 1.52 }),
5506 wakaba 1.134 ## NOTE: legend, Flow
5507     check_child_element => sub {
5508     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5509     $child_is_transparent, $element_state) = @_;
5510     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5511     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5512     $self->{onerror}->(node => $child_el,
5513     type => 'element not allowed:minus',
5514     level => $self->{level}->{must});
5515     $element_state->{has_non_legend} = 1;
5516     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5517     #
5518     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5519     if ($element_state->{has_non_legend}) {
5520     $self->{onerror}->(node => $child_el,
5521     type => 'element not allowed:details legend',
5522     level => $self->{level}->{must});
5523     }
5524     $element_state->{has_legend} = 1;
5525     $element_state->{has_non_legend} = 1;
5526     } else {
5527     $HTMLFlowContentChecker{check_child_element}->(@_);
5528     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5529     ## TODO:
5530 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5531 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5532     ## therefore |details| part of the content model does not match.
5533     }
5534     },
5535     check_child_text => sub {
5536     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5537     if ($has_significant) {
5538     $element_state->{has_non_legend} = 1;
5539     }
5540     },
5541     check_end => sub {
5542     my ($self, $item, $element_state) = @_;
5543    
5544     unless ($element_state->{has_legend}) {
5545     $self->{onerror}->(node => $item->{node},
5546     type => 'child element missing',
5547     text => 'legend',
5548     level => $self->{level}->{must});
5549     }
5550    
5551     $HTMLFlowContentChecker{check_end}->(@_);
5552 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5553 wakaba 1.134 },
5554     ## NOTE: This definition is partially reused by |details| element's
5555     ## checker.
5556 wakaba 1.52 };
5557    
5558     $Element->{$HTML_NS}->{input} = {
5559 wakaba 1.119 %HTMLEmptyChecker,
5560     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5561 wakaba 1.140 check_attrs => sub {
5562     my ($self, $item, $element_state) = @_;
5563 wakaba 1.142
5564 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5565 wakaba 1.142 $state = 'text' unless defined $state;
5566     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5567    
5568 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5569     my $attr_ns = $attr->namespace_uri;
5570     $attr_ns = '' unless defined $attr_ns;
5571     my $attr_ln = $attr->manakai_local_name;
5572     my $checker;
5573     my $status;
5574     if ($attr_ns eq '') {
5575     $status =
5576     {
5577     %HTMLAttrStatus,
5578     %HTMLM12NCommonAttrStatus,
5579     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5580     'accept-charset' => FEATURE_HTML2X_RFC,
5581 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5582 wakaba 1.140 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5583     align => FEATURE_M12N10_REC_DEPRECATED,
5584     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5585     autocomplete => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5586     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5587     checked => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5588     datafld => FEATURE_HTML4_REC_RESERVED,
5589     dataformatas => FEATURE_HTML4_REC_RESERVED,
5590     datasrc => FEATURE_HTML4_REC_RESERVED,
5591     disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5592     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5593     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5594 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5595     FEATURE_XHTMLBASIC11_CR,
5596 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5597 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5598 wakaba 1.140 list => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5599     max => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5600 wakaba 1.150 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X |
5601     FEATURE_M12N10_REC,
5602 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5603     min => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5604 wakaba 1.156 multiple => FEATURE_HTML5_DEFAULT,
5605 wakaba 1.140 name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5606 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5607 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5608     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5609     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5610     onformchange => FEATURE_WF2_INFORMATIVE,
5611     onforminput => FEATURE_WF2_INFORMATIVE,
5612     oninput => FEATURE_WF2,
5613     oninvalid => FEATURE_WF2,
5614     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5615     pattern => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5616 wakaba 1.156 placeholder => FEATURE_HTML5_DEFAULT,
5617 wakaba 1.140 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5618     replace => FEATURE_WF2,
5619     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5620     sdapref => FEATURE_HTML20_RFC,
5621 wakaba 1.154 size => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5622 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5623     step => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5624     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5625     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5626 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5627 wakaba 1.140 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5628     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5629     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5630     }->{$attr_ln};
5631    
5632     $checker =
5633     {
5634 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5635     ## applicable for a specific set of states.
5636 wakaba 1.142 accept => '',
5637 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5638     ## NOTE: To which states it applies is not defined in RFC 2070.
5639 wakaba 1.142 action => '',
5640 wakaba 1.150 align => '',
5641 wakaba 1.141 alt => '',
5642 wakaba 1.142 autocomplete => '',
5643 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5644     ## NOTE: <input type=hidden disabled> is not disallowed.
5645 wakaba 1.142 checked => '',
5646     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5647 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5648 wakaba 1.142 enctype => '',
5649     form => $HTMLFormAttrChecker,
5650 wakaba 1.150 inputmode => '',
5651     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5652 wakaba 1.142 list => '',
5653     max => '',
5654     maxlength => '',
5655     method => '',
5656     min => '',
5657 wakaba 1.156 multiple => '',
5658 wakaba 1.165 name => $FormControlNameAttrChecker,
5659 wakaba 1.166 novalidate => '',
5660 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5661     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5662     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5663     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5664     ## TODO: tests for four attributes above
5665 wakaba 1.142 pattern => '',
5666 wakaba 1.156 placeholder => '',
5667 wakaba 1.142 readonly => '',
5668 wakaba 1.150 replace => '',
5669 wakaba 1.142 required => '',
5670     size => '',
5671     src => '',
5672     step => '',
5673     target => '',
5674 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5675 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5676     email => 1, password => 1,
5677 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5678 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5679     checkbox => 1,
5680 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5681     button => 1,
5682 wakaba 1.140 }),
5683 wakaba 1.151 usemap => '',
5684 wakaba 1.142 value => '',
5685 wakaba 1.140 }->{$attr_ln};
5686 wakaba 1.141
5687     ## State-dependent checkers
5688     unless ($checker) {
5689     if ($state eq 'hidden') {
5690     $checker =
5691     {
5692 wakaba 1.142 value => sub {
5693     my ($self, $attr, $item, $element_state) = @_;
5694 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5695 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5696     $self->{onerror}->(node => $attr,
5697     type => '_charset_ value',
5698     level => $self->{level}->{must});
5699     }
5700     },
5701 wakaba 1.141 }->{$attr_ln} || $checker;
5702 wakaba 1.142 ## TODO: Warn if no name attribute?
5703     ## TODO: Warn if name!=_charset_ and no value attribute?
5704 wakaba 1.168 } elsif ({
5705     datetime => 1, date => 1, month => 1, time => 1,
5706     week => 1, 'datetime-local' => 1,
5707     }->{$state}) {
5708     my $v = {
5709     datetime => ['global_date_and_time_string'],
5710     date => ['date_string'],
5711     month => ['month_string'],
5712     week => ['week_string'],
5713     time => ['time_string'],
5714     'datetime-local' => ['local_date_and_time_string'],
5715     }->{$state};
5716 wakaba 1.144 $checker =
5717     {
5718 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5719     on => 1, off => 1,
5720     }),
5721 wakaba 1.158 list => $ListAttrChecker,
5722 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5723     max => $GetDateTimeAttrChecker->($v->[0]),
5724 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5725 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5726 wakaba 1.148 step => $StepAttrChecker,
5727 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5728 wakaba 1.144 }->{$attr_ln} || $checker;
5729     } elsif ($state eq 'number') {
5730     $checker =
5731     {
5732 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5733     on => 1, off => 1,
5734     }),
5735 wakaba 1.158 list => $ListAttrChecker,
5736 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5737     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5738 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5739 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5740 wakaba 1.148 step => $StepAttrChecker,
5741 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5742 wakaba 1.144 }->{$attr_ln} || $checker;
5743     } elsif ($state eq 'range') {
5744     $checker =
5745     {
5746 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5747     on => 1, off => 1,
5748     }),
5749 wakaba 1.158 list => $ListAttrChecker,
5750 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5751     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5752 wakaba 1.148 step => $StepAttrChecker,
5753 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5754 wakaba 1.144 }->{$attr_ln} || $checker;
5755 wakaba 1.157 } elsif ($state eq 'color') {
5756     $checker =
5757     {
5758     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5759     on => 1, off => 1,
5760     }),
5761 wakaba 1.158 list => $ListAttrChecker,
5762 wakaba 1.157 value => sub {
5763     my ($self, $attr) = @_;
5764     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5765     $self->{onerror}->(node => $attr,
5766     type => 'scolor:syntax error', ## TODOC: type
5767     level => $self->{level}->{must});
5768     }
5769     },
5770     }->{$attr_ln} || $checker;
5771 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5772     $checker =
5773     {
5774 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5775     ## TODO: tests
5776 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5777 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5778     }->{$attr_ln} || $checker;
5779     ## TODO: There MUST be another input type=radio with same
5780     ## name (Radio state).
5781     ## ISSUE: There should be exactly one type=radio with checked?
5782     } elsif ($state eq 'file') {
5783     $checker =
5784     {
5785 wakaba 1.161 accept => $AcceptAttrChecker,
5786 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5787 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5788 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5789 wakaba 1.144 }->{$attr_ln} || $checker;
5790     } elsif ($state eq 'submit') {
5791     $checker =
5792     {
5793 wakaba 1.149 action => $HTMLURIAttrChecker,
5794 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5795     'application/x-www-form-urlencoded' => 1,
5796     'multipart/form-data' => 1,
5797     'text/plain' => 1,
5798     }),
5799 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5800     get => 1, post => 1, put => 1, delete => 1,
5801     }),
5802 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5803 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5804     document => 1, values => 1,
5805     }),
5806     target => $HTMLTargetAttrChecker,
5807 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5808     }->{$attr_ln} || $checker;
5809     } elsif ($state eq 'image') {
5810     $checker =
5811     {
5812 wakaba 1.149 action => $HTMLURIAttrChecker,
5813     align => $GetHTMLEnumeratedAttrChecker->({
5814     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5815     }),
5816 wakaba 1.144 alt => sub {
5817     my ($self, $attr) = @_;
5818     my $value = $attr->value;
5819     unless (length $value) {
5820     $self->{onerror}->(node => $attr,
5821     type => 'empty anchor image alt',
5822     level => $self->{level}->{must});
5823     }
5824     },
5825 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5826     'application/x-www-form-urlencoded' => 1,
5827     'multipart/form-data' => 1,
5828     'text/plain' => 1,
5829     }),
5830 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5831     method => $GetHTMLEnumeratedAttrChecker->({
5832     get => 1, post => 1, put => 1, delete => 1,
5833     }),
5834 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5835 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5836     document => 1, values => 1,
5837     }),
5838 wakaba 1.144 src => $HTMLURIAttrChecker,
5839     ## TODO: There is requirements on the referenced resource.
5840 wakaba 1.149 target => $HTMLTargetAttrChecker,
5841     usemap => $HTMLUsemapAttrChecker,
5842 wakaba 1.144 }->{$attr_ln} || $checker;
5843     ## TODO: alt & src are required.
5844     } elsif ({
5845     reset => 1, button => 1,
5846     ## NOTE: From Web Forms 2.0:
5847     remove => 1, 'move-up' => 1, 'move-down' => 1,
5848     add => 1,
5849     }->{$state}) {
5850     $checker =
5851     {
5852     ## NOTE: According to Web Forms 2.0, |input| attribute
5853     ## has |template| attribute to support the |add| button
5854     ## type (as part of the repetition template feature). It
5855     ## conflicts with the |template| global attribute
5856     ## introduced as part of the data template feature.
5857     ## NOTE: |template| attribute as defined in Web Forms 2.0
5858     ## has no author requirement.
5859     value => sub { }, ## NOTE: No restriction.
5860     }->{$attr_ln} || $checker;
5861 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5862 wakaba 1.141 $checker =
5863     {
5864 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5865     on => 1, off => 1,
5866     }),
5867 wakaba 1.149 ## TODO: inputmode [WF2]
5868 wakaba 1.158 list => $ListAttrChecker,
5869 wakaba 1.147 maxlength => sub {
5870     my ($self, $attr, $item, $element_state) = @_;
5871    
5872     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5873    
5874 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5875 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5876     ## integers results in a number.
5877     my $max_allowed_value_length = 0+$1;
5878    
5879     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5880     if (defined $value) {
5881     my $codepoint_length = length $value;
5882 wakaba 1.162
5883 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5884     $self->{onerror}
5885     ->(node => $item->{node}
5886     ->get_attribute_node_ns (undef, 'value'),
5887     type => 'value too long',
5888     level => $self->{level}->{must});
5889     }
5890     }
5891     }
5892     },
5893 wakaba 1.160 pattern => $PatternAttrChecker,
5894 wakaba 1.159 placeholder => sub {
5895     my ($self, $attr) = @_;
5896     if ($attr->value =~ /[\x0D\x0A]/) {
5897     $self->{onerror}->(node => $attr,
5898     type => 'newline in value', ## TODOC: type
5899     level => $self->{level}->{must});
5900     }
5901     },
5902 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5903 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5904 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5905 wakaba 1.143 value => sub {
5906 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5907     if ($state eq 'url') {
5908     $HTMLURIAttrChecker->(@_);
5909     } elsif ($state eq 'email') {
5910     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5911     my @addr = split /,/, $attr->value, -1;
5912     @addr = ('') unless @addr;
5913     for (@addr) {
5914 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5915     s/[\x09\x0A\x0C\x0D\x20]\z//;
5916 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5917     $self->{onerror}->(node => $attr,
5918     type => 'email:syntax error', ## TODO: type
5919     value => $_,
5920     level => $self->{level}->{must});
5921     }
5922     }
5923     } else {
5924     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5925     $self->{onerror}->(node => $attr,
5926     type => 'email:syntax error', ## TODO: type
5927     level => $self->{level}->{must});
5928     }
5929     }
5930     } else {
5931     if ($attr->value =~ /[\x0D\x0A]/) {
5932     $self->{onerror}->(node => $attr,
5933     type => 'newline in value', ## TODO: type
5934     level => $self->{level}->{must});
5935     }
5936     }
5937 wakaba 1.143 },
5938 wakaba 1.141 }->{$attr_ln} || $checker;
5939 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5940 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
5941     if $state eq 'email' and $attr_ln eq 'multiple';
5942 wakaba 1.161
5943     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
5944     not $item->{node}->has_attribute_ns (undef, 'title')) {
5945     $self->{onerror}->(node => $item->{node},
5946     type => 'attribute missing',
5947     text => 'title',
5948     level => $self->{level}->{should});
5949     }
5950 wakaba 1.141 }
5951     }
5952    
5953     if (defined $checker) {
5954     if ($checker eq '') {
5955     $checker = sub {
5956     my ($self, $attr) = @_;
5957     $self->{onerror}->(node => $attr,
5958     type => 'input attr not applicable',
5959     text => $state,
5960     level => $self->{level}->{must});
5961     };
5962     }
5963 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
5964     $attr_ln !~ /[A-Z]/) {
5965     $checker = $HTMLDatasetAttrChecker;
5966     $status = $HTMLDatasetAttrStatus;
5967     } else {
5968     $checker = $HTMLAttrChecker->{$attr_ln};
5969     }
5970     }
5971     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
5972     || $AttrChecker->{$attr_ns}->{''};
5973     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
5974     || $AttrStatus->{$attr_ns}->{''};
5975     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
5976 wakaba 1.157
5977 wakaba 1.140 if ($checker) {
5978     $checker->($self, $attr, $item, $element_state) if ref $checker;
5979     } elsif ($attr_ns eq '' and not $status) {
5980     #
5981     } else {
5982     $self->{onerror}->(node => $attr,
5983     type => 'unknown attribute',
5984     level => $self->{level}->{uncertain});
5985     ## ISSUE: No comformance createria for unknown attributes in the spec
5986     }
5987    
5988     $self->_attr_status_info ($attr, $status);
5989     }
5990 wakaba 1.168
5991     ## ISSUE: -0/+0
5992    
5993     if ($state eq 'range') {
5994     $element_state->{number_value}->{min} ||= 0;
5995     $element_state->{number_value}->{max} = 100
5996     unless defined $element_state->{number_value}->{max};
5997     }
5998    
5999     if (defined $element_state->{date_value}->{min} or
6000     defined $element_state->{date_value}->{max}) {
6001     my $min_value = $element_state->{date_value}->{min};
6002     my $max_value = $element_state->{date_value}->{max};
6003     my $value_value = $element_state->{date_value}->{value};
6004    
6005     if (defined $min_value and $min_value eq '' and
6006     (defined $max_value or defined $value_value)) {
6007     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6008     $self->{onerror}->(node => $min,
6009     type => 'date value not supported', ## TODOC: type
6010     value => $min->value,
6011     level => $self->{level}->{unsupported});
6012     undef $min_value;
6013     }
6014     if (defined $max_value and $max_value eq '' and
6015     (defined $max_value or defined $value_value)) {
6016     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6017     $self->{onerror}->(node => $max,
6018     type => 'date value not supported', ## TODOC: type
6019     value => $max->value,
6020     level => $self->{level}->{unsupported});
6021     undef $max_value;
6022     }
6023     if (defined $value_value and $value_value eq '' and
6024     (defined $max_value or defined $min_value)) {
6025     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6026     $self->{onerror}->(node => $value,
6027     type => 'date value not supported', ## TODOC: type
6028     value => $value->value,
6029     level => $self->{level}->{unsupported});
6030     undef $value_value;
6031     }
6032    
6033     if (defined $min_value and defined $max_value) {
6034     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6035     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6036     $self->{onerror}->(node => $max,
6037     type => 'max lt min', ## TODOC: type
6038     level => $self->{level}->{must});
6039     }
6040     }
6041    
6042     if (defined $min_value and defined $value_value) {
6043     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6044     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6045     $self->{onerror}->(node => $value,
6046     type => 'value lt min', ## TODOC: type
6047     level => $self->{level}->{warn});
6048     ## NOTE: Not an error.
6049     }
6050     }
6051    
6052     if (defined $max_value and defined $value_value) {
6053     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6054     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6055     $self->{onerror}->(node => $value,
6056     type => 'value gt max', ## TODOC: type
6057     level => $self->{level}->{warn});
6058     ## NOTE: Not an error.
6059     }
6060     }
6061     } elsif (defined $element_state->{number_value}->{min} or
6062     defined $element_state->{number_value}->{max}) {
6063     my $min_value = $element_state->{number_value}->{min};
6064     my $max_value = $element_state->{number_value}->{max};
6065     my $value_value = $element_state->{number_value}->{value};
6066    
6067     if (defined $min_value and defined $max_value) {
6068     if ($min_value > $max_value) {
6069     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6070     $self->{onerror}->(node => $max,
6071     type => 'max lt min', ## TODOC: type
6072     level => $self->{level}->{must});
6073     }
6074     }
6075    
6076     if (defined $min_value and defined $value_value) {
6077     if ($min_value > $value_value) {
6078     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6079     $self->{onerror}->(node => $value,
6080     type => 'value lt min', ## TODOC: type
6081     level => $self->{level}->{warn});
6082     ## NOTE: Not an error.
6083     }
6084     }
6085    
6086     if (defined $max_value and defined $value_value) {
6087     if ($max_value < $value_value) {
6088     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6089     $self->{onerror}->(node => $value,
6090     type => 'value gt max', ## TODOC: type
6091     level => $self->{level}->{warn});
6092     ## NOTE: Not an error.
6093     }
6094     }
6095     }
6096 wakaba 1.150
6097 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6098    
6099 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6100     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6101     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6102     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6103     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6104 wakaba 1.140 },
6105 wakaba 1.66 check_start => sub {
6106     my ($self, $item, $element_state) = @_;
6107 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6108     $self->{onerror}->(node => $item->{node},
6109     type => 'multiple labelable fae',
6110     level => $self->{level}->{must});
6111     } else {
6112     $self->{flag}->{has_labelable} = 2;
6113     }
6114 wakaba 1.138
6115     $element_state->{id_type} = 'labelable';
6116 wakaba 1.66 },
6117 wakaba 1.52 };
6118    
6119 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
6120    
6121 wakaba 1.80 ## NOTE: "authors who are nesting repetition blocks should position such
6122     ## [repetition-block-related] buttons carefully to make clear which block a
6123 wakaba 1.150 ## button applies to." [WF2]: I have no idea how this can be tested.
6124 wakaba 1.80
6125 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6126 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6127     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6128 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6129 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6130     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6131     ## |button| elements.
6132 wakaba 1.56 action => $HTMLURIAttrChecker,
6133 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6134 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6135 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6136     'application/x-www-form-urlencoded' => 1,
6137     'multipart/form-data' => 1,
6138     'text/plain' => 1,
6139     }),
6140 wakaba 1.136 form => $HTMLFormAttrChecker,
6141 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6142     get => 1, post => 1, put => 1, delete => 1,
6143     }),
6144 wakaba 1.165 name => $FormControlNameAttrChecker,
6145 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6146 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6147     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6148 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6149     target => $HTMLTargetAttrChecker,
6150 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6151     ## attribute to support the |add| button type (as part of repetition
6152     ## template feature). It conflicts with the |template| global attribute
6153     ## introduced as part of the data template feature.
6154     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6155     ## author requirement.
6156 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6157     button => 1, submit => 1, reset => 1,
6158     }),
6159 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6160 wakaba 1.52 }, {
6161     %HTMLAttrStatus,
6162     %HTMLM12NCommonAttrStatus,
6163 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
6164 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6165     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6166 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6167     dataformatas => FEATURE_HTML4_REC_RESERVED,
6168     datasrc => FEATURE_HTML4_REC_RESERVED,
6169 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6170     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6171     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6172 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6173 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6174     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6175 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6176 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6177     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6178 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6179     onforminput => FEATURE_WF2_INFORMATIVE,
6180 wakaba 1.56 replace => FEATURE_WF2,
6181 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6182 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6183 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6184 wakaba 1.119 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6185     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6186 wakaba 1.52 }),
6187 wakaba 1.66 check_start => sub {
6188     my ($self, $item, $element_state) = @_;
6189 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6190     $self->{onerror}->(node => $item->{node},
6191     type => 'multiple labelable fae',
6192     level => $self->{level}->{must});
6193     } else {
6194     $self->{flag}->{has_labelable} = 2;
6195     }
6196 wakaba 1.162
6197     ## ISSUE: "The value attribute must not be present unless the form
6198     ## [content] attribute is present.": Wrong?
6199 wakaba 1.139
6200 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6201     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6202 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6203     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6204 wakaba 1.138
6205     $element_state->{id_type} = 'labelable';
6206 wakaba 1.66 },
6207 wakaba 1.52 };
6208    
6209     $Element->{$HTML_NS}->{label} = {
6210 wakaba 1.139 %HTMLPhrasingContentChecker,
6211 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC
6212     | FEATURE_XHTML2_ED,
6213 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6214 wakaba 1.138 for => sub {
6215     my ($self, $attr) = @_;
6216    
6217     ## NOTE: MUST be an ID of a labelable element.
6218    
6219     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6220     },
6221 wakaba 1.136 form => $HTMLFormAttrChecker,
6222 wakaba 1.52 }, {
6223     %HTMLAttrStatus,
6224 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6225 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2 | FEATURE_M12N10_REC,
6226 wakaba 1.119 for => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6227     form => FEATURE_HTML5_DEFAULT,
6228 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6229 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6230     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6231     }),
6232 wakaba 1.139 check_start => sub {
6233     my ($self, $item, $element_state) = @_;
6234     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6235    
6236     $element_state->{has_label_original} = $self->{flag}->{has_label};
6237     $self->{flag}->{has_label} = 1;
6238     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6239 wakaba 1.155 $self->{flag}->{has_labelable}
6240     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6241 wakaba 1.139
6242     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6243     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6244     },
6245     check_end => sub {
6246     my ($self, $item, $element_state) = @_;
6247     $self->_remove_minus_elements ($element_state);
6248    
6249     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6250     $self->{flag}->{has_labelable}
6251     = $element_state->{has_labelable_original};
6252     }
6253     delete $self->{flag}->{has_label}
6254     unless $element_state->{has_label_original};
6255     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6256    
6257     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6258    
6259     $HTMLPhrasingContentChecker{check_end}->(@_);
6260     },
6261 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6262     };
6263    
6264     $Element->{$HTML_NS}->{select} = {
6265 wakaba 1.121 %HTMLChecker,
6266 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6267 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6268     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6269 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6270 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6271 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6272 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6273 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6274 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6275 wakaba 1.136 form => $HTMLFormAttrChecker,
6276 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6277 wakaba 1.165 name => $FormControlNameAttrChecker,
6278 wakaba 1.163 ## TODO: tests for on*
6279 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6280     onforminput => $HTMLEventHandlerAttrChecker,
6281     oninput => $HTMLEventHandlerAttrChecker,
6282 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6283 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6284 wakaba 1.52 }, {
6285     %HTMLAttrStatus,
6286     %HTMLM12NCommonAttrStatus,
6287 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2,
6288 wakaba 1.119 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6289 wakaba 1.56 data => FEATURE_WF2,
6290 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6291     dataformatas => FEATURE_HTML4_REC_RESERVED,
6292     datasrc => FEATURE_HTML4_REC_RESERVED,
6293 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6294     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6295 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6296 wakaba 1.119 multiple => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6297     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6298 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6299     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6300 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6301     onforminput => FEATURE_WF2_INFORMATIVE,
6302 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6303 wakaba 1.126 oninput => FEATURE_WF2,
6304 wakaba 1.56 oninvalid => FEATURE_WF2,
6305 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6306     sdapref => FEATURE_HTML20_RFC,
6307 wakaba 1.119 size => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6308 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6309     }),
6310 wakaba 1.66 check_start => sub {
6311     my ($self, $item, $element_state) = @_;
6312 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6313     $self->{onerror}->(node => $item->{node},
6314     type => 'multiple labelable fae',
6315     level => $self->{level}->{must});
6316     } else {
6317     $self->{flag}->{has_labelable} = 2;
6318     }
6319 wakaba 1.66
6320     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6321     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6322 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6323     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6324 wakaba 1.138
6325     $element_state->{id_type} = 'labelable';
6326 wakaba 1.66 },
6327 wakaba 1.121 check_child_element => sub {
6328 wakaba 1.163 ## NOTE: (option | optgroup)*
6329    
6330 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6331     $child_is_transparent, $element_state) = @_;
6332 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6333     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6334 wakaba 1.121 $self->{onerror}->(node => $child_el,
6335     type => 'element not allowed:minus',
6336     level => $self->{level}->{must});
6337     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6338     #
6339     } elsif ($child_nsuri eq $HTML_NS and
6340     {
6341     option => 1, optgroup => 1,
6342     }->{$child_ln}) {
6343     #
6344     } else {
6345     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6346     level => $self->{level}->{must});
6347     }
6348     },
6349     check_child_text => sub {
6350     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6351     if ($has_significant) {
6352     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6353     level => $self->{level}->{must});
6354     }
6355     },
6356 wakaba 1.52 };
6357 wakaba 1.1
6358 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6359 wakaba 1.121 %HTMLPhrasingContentChecker,
6360 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6361 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6362     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6363     }, {
6364 wakaba 1.52 %HTMLAttrStatus,
6365 wakaba 1.56 data => FEATURE_WF2,
6366 wakaba 1.52 }),
6367 wakaba 1.66 check_start => sub {
6368     my ($self, $item, $element_state) = @_;
6369    
6370 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6371    
6372 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6373 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6374     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6375 wakaba 1.158
6376     $element_state->{id_type} = 'datalist';
6377 wakaba 1.66 },
6378 wakaba 1.121 ## NOTE: phrasing | option*
6379     check_child_element => sub {
6380     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6381     $child_is_transparent, $element_state) = @_;
6382 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6383     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6384 wakaba 1.121 $self->{onerror}->(node => $child_el,
6385     type => 'element not allowed:minus',
6386     level => $self->{level}->{must});
6387     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6388     #
6389     } elsif ($element_state->{phase} eq 'phrasing') {
6390     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6391     #
6392     } else {
6393     $self->{onerror}->(node => $child_el,
6394     type => 'element not allowed:phrasing',
6395     level => $self->{level}->{must});
6396     }
6397     } elsif ($element_state->{phase} eq 'option') {
6398     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6399     #
6400     } else {
6401     $self->{onerror}->(node => $child_el,
6402     type => 'element not allowed',
6403     level => $self->{level}->{must});
6404     }
6405     } elsif ($element_state->{phase} eq 'any') {
6406     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6407     $element_state->{phase} = 'phrasing';
6408     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6409     $element_state->{phase} = 'option';
6410     } else {
6411     $self->{onerror}->(node => $child_el,
6412     type => 'element not allowed',
6413     level => $self->{level}->{must});
6414     }
6415     } else {
6416     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6417     }
6418     },
6419     check_child_text => sub {
6420     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6421     if ($has_significant) {
6422     if ($element_state->{phase} eq 'phrasing') {
6423     #
6424     } elsif ($element_state->{phase} eq 'any') {
6425     $element_state->{phase} = 'phrasing';
6426     } else {
6427     $self->{onerror}->(node => $child_node,
6428     type => 'character not allowed',
6429     level => $self->{level}->{must});
6430     }
6431     }
6432     },
6433     check_end => sub {
6434     my ($self, $item, $element_state) = @_;
6435     if ($element_state->{phase} eq 'phrasing') {
6436     if ($element_state->{has_significant}) {
6437     $item->{real_parent_state}->{has_significant} = 1;
6438     } elsif ($item->{transparent}) {
6439     #
6440     } else {
6441     $self->{onerror}->(node => $item->{node},
6442     type => 'no significant content',
6443     level => $self->{level}->{should});
6444     }
6445     } else {
6446     ## NOTE: Since the content model explicitly allows a |datalist| element
6447     ## being empty, we don't raise "no significant content" error for this
6448     ## element when there is no element. (We should raise an error for
6449     ## |<datalist><br></datalist>|, however.)
6450     ## NOTE: As a side-effect, when the |datalist| element only contains
6451     ## non-conforming content, then the |phase| flag has not changed from
6452     ## |any|, no "no significant content" error is raised neither.
6453     $HTMLChecker{check_end}->(@_);
6454     }
6455     },
6456 wakaba 1.52 };
6457 wakaba 1.49
6458 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6459 wakaba 1.121 %HTMLChecker,
6460 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6461 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6462     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6463 wakaba 1.164 label => sub {},
6464 wakaba 1.52 }, {
6465     %HTMLAttrStatus,
6466     %HTMLM12NCommonAttrStatus,
6467 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6468     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6469 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6470 wakaba 1.52 }),
6471 wakaba 1.164 check_attrs2 => sub {
6472     my ($self, $item, $element_state) = @_;
6473    
6474     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6475     $self->{onerror}->(node => $item->{node},
6476     type => 'attribute missing',
6477     text => 'label',
6478     level => $self->{level}->{must});
6479     }
6480     },
6481 wakaba 1.121 check_child_element => sub {
6482     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6483     $child_is_transparent, $element_state) = @_;
6484 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6485     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6486 wakaba 1.121 $self->{onerror}->(node => $child_el,
6487     type => 'element not allowed:minus',
6488     level => $self->{level}->{must});
6489     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6490     #
6491     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6492     #
6493     } else {
6494     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6495     level => $self->{level}->{must});
6496     }
6497     },
6498     check_child_text => sub {
6499     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6500     if ($has_significant) {
6501     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6502     level => $self->{level}->{must});
6503     }
6504     },
6505 wakaba 1.52 };
6506    
6507     $Element->{$HTML_NS}->{option} = {
6508     %HTMLTextChecker,
6509 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6510 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6511     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6512 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6513     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6514     value => sub {}, ## NOTE: No restriction.
6515 wakaba 1.52 }, {
6516     %HTMLAttrStatus,
6517     %HTMLM12NCommonAttrStatus,
6518 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6519     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6520 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6521 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6522     sdapref => FEATURE_HTML20_RFC,
6523 wakaba 1.119 selected => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6524     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6525 wakaba 1.52 }),
6526     };
6527 wakaba 1.49
6528 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6529     %HTMLTextChecker,
6530 wakaba 1.121 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6531 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6532 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6533 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6534 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6535 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6536 wakaba 1.136 form => $HTMLFormAttrChecker,
6537 wakaba 1.56 ## TODO: inputmode [WF2]
6538 wakaba 1.164 maxlength => sub {
6539     my ($self, $attr, $item, $element_state) = @_;
6540    
6541     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6542    
6543 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6544 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6545     ## results in a number.
6546     my $max_allowed_value_length = 0+$1;
6547    
6548     ## ISSUE: "The the purposes of this requirement," (typo)
6549    
6550     ## ISSUE: This constraint is applied w/o CRLF normalization to
6551     ## |value| attribute, but w/ CRLF normalization to
6552     ## concept-value.
6553     my $value = $item->{node}->text_content;
6554     if (defined $value) {
6555     my $codepoint_length = length $value;
6556    
6557     if ($codepoint_length > $max_allowed_value_length) {
6558     $self->{onerror}->(node => $item->{node},
6559     type => 'value too long',
6560     level => $self->{level}->{must});
6561     }
6562     }
6563     }
6564     },
6565 wakaba 1.165 name => $FormControlNameAttrChecker,
6566 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6567     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6568     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6569 wakaba 1.161 pattern => $PatternAttrChecker,
6570 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6571 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6572 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6573     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6574     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6575 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6576 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6577 wakaba 1.52 }, {
6578     %HTMLAttrStatus,
6579     %HTMLM12NCommonAttrStatus,
6580 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6581 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6582 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
6583 wakaba 1.121 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6584     cols => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6585 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6586 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6587     datasrc => FEATURE_HTML4_REC_RESERVED,
6588 wakaba 1.121 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6589     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6590 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6591 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6592 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6593     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6594 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6595     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6596     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6597 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6598     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6599     oninput => FEATURE_WF2, ## TODO: tests
6600     oninvalid => FEATURE_WF2, ## TODO: tests
6601 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6602 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6603 wakaba 1.121 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6604     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6605     rows => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6606 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6607     sdapref => FEATURE_HTML20_RFC,
6608 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6609 wakaba 1.121 wrap => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6610 wakaba 1.52 }),
6611 wakaba 1.66 check_start => sub {
6612     my ($self, $item, $element_state) = @_;
6613 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6614     $self->{onerror}->(node => $item->{node},
6615     type => 'multiple labelable fae',
6616     level => $self->{level}->{must});
6617     } else {
6618     $self->{flag}->{has_labelable} = 2;
6619     }
6620 wakaba 1.164
6621     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6622     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6623     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6624    
6625     $element_state->{id_type} = 'labelable';
6626     },
6627     check_attrs2 => sub {
6628     my ($self, $item, $element_state) = @_;
6629 wakaba 1.66
6630 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6631     not $item->{node}->has_attribute_ns (undef, 'title')) {
6632     ## NOTE: WF2 (dropped by HTML5)
6633     $self->{onerror}->(node => $item->{node},
6634     type => 'attribute missing',
6635     text => 'title',
6636     level => $self->{level}->{should});
6637     }
6638    
6639 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6640     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6641     if (defined $wrap) {
6642     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6643     if ($wrap eq 'hard') {
6644     $self->{onerror}->(node => $item->{node},
6645     type => 'attribute missing',
6646     text => 'cols',
6647     level => $self->{level}->{must});
6648     }
6649     }
6650     }
6651 wakaba 1.66 },
6652 wakaba 1.52 };
6653 wakaba 1.49
6654 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6655 wakaba 1.121 %HTMLPhrasingContentChecker,
6656     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6657 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6658 wakaba 1.165 for => sub {
6659     my ($self, $attr) = @_;
6660    
6661     ## NOTE: "Unordered set of unique space-separated tokens".
6662    
6663     my %word;
6664     for my $word (grep {length $_}
6665     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6666     unless ($word{$word}) {
6667     $word{$word} = 1;
6668     push @{$self->{idref}}, ['any', $word, $attr];
6669     } else {
6670     $self->{onerror}->(node => $attr, type => 'duplicate token',
6671     value => $word,
6672     level => $self->{level}->{must});
6673     }
6674     }
6675     },
6676 wakaba 1.136 form => $HTMLFormAttrChecker,
6677 wakaba 1.165 name => $FormControlNameAttrChecker,
6678     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6679     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6680 wakaba 1.56 }, {
6681 wakaba 1.52 %HTMLAttrStatus,
6682 wakaba 1.121 for => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6683     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6684     name => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6685 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6686     onformchange => FEATURE_WF2,
6687     onforminput => FEATURE_WF2,
6688 wakaba 1.52 }),
6689     };
6690    
6691     $Element->{$HTML_NS}->{isindex} = {
6692     %HTMLEmptyChecker,
6693 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6694     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6695 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6696     prompt => sub {}, ## NOTE: Text [M12N]
6697     }, {
6698     %HTMLAttrStatus,
6699 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6700     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6701     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6702     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6703 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6704 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6705 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6706     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6707 wakaba 1.52 }),
6708     ## TODO: Tests
6709     ## TODO: Tests for <nest/> in <isindex>
6710 wakaba 1.66 check_start => sub {
6711     my ($self, $item, $element_state) = @_;
6712    
6713     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6714 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6715     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6716 wakaba 1.66 },
6717 wakaba 1.52 };
6718 wakaba 1.49
6719 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6720 wakaba 1.40 %HTMLChecker,
6721 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6722 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6723 wakaba 1.91 charset => sub {
6724     my ($self, $attr) = @_;
6725    
6726     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6727     $self->{onerror}->(type => 'attribute not allowed',
6728     node => $attr,
6729 wakaba 1.104 level => $self->{level}->{must});
6730 wakaba 1.91 }
6731    
6732     $HTMLCharsetChecker->($attr->value, @_);
6733     },
6734 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6735 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6736 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6737     async => $GetHTMLBooleanAttrChecker->('async'),
6738 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6739 wakaba 1.49 }, {
6740     %HTMLAttrStatus,
6741 wakaba 1.153 async => FEATURE_HTML5_WD,
6742     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6743     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6744 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6745     for => FEATURE_HTML4_REC_RESERVED,
6746 wakaba 1.154 href => FEATURE_RDFA_REC,
6747 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6748 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6749 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6750     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6751 wakaba 1.9 }),
6752 wakaba 1.40 check_start => sub {
6753     my ($self, $item, $element_state) = @_;
6754 wakaba 1.1
6755 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6756     $element_state->{must_be_empty} = 1;
6757 wakaba 1.1 } else {
6758     ## NOTE: No content model conformance in HTML5 spec.
6759 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6760     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6761 wakaba 1.1 if ((defined $type and $type eq '') or
6762     (defined $language and $language eq '')) {
6763     $type = 'text/javascript';
6764     } elsif (defined $type) {
6765     #
6766     } elsif (defined $language) {
6767     $type = 'text/' . $language;
6768     } else {
6769     $type = 'text/javascript';
6770     }
6771 wakaba 1.93
6772     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6773     $type = "$1/$2";
6774     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6775     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6776     }
6777     $element_state->{script_type} = $type;
6778 wakaba 1.40 }
6779 wakaba 1.66
6780     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6781 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6782     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6783 wakaba 1.107
6784     $element_state->{text} = '';
6785 wakaba 1.40 },
6786     check_child_element => sub {
6787     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6788     $child_is_transparent, $element_state) = @_;
6789 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6790     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6791 wakaba 1.40 $self->{onerror}->(node => $child_el,
6792     type => 'element not allowed:minus',
6793 wakaba 1.104 level => $self->{level}->{must});
6794 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6795     #
6796     } else {
6797     if ($element_state->{must_be_empty}) {
6798     $self->{onerror}->(node => $child_el,
6799 wakaba 1.104 type => 'element not allowed:empty',
6800     level => $self->{level}->{must});
6801 wakaba 1.40 }
6802     }
6803     },
6804     check_child_text => sub {
6805     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6806     if ($has_significant and
6807     $element_state->{must_be_empty}) {
6808     $self->{onerror}->(node => $child_node,
6809 wakaba 1.104 type => 'character not allowed:empty',
6810     level => $self->{level}->{must});
6811 wakaba 1.40 }
6812 wakaba 1.115 $element_state->{text} .= $child_node->data;
6813 wakaba 1.40 },
6814     check_end => sub {
6815     my ($self, $item, $element_state) = @_;
6816     unless ($element_state->{must_be_empty}) {
6817 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6818     ## NOTE: XML content should be checked by THIS instance of checker
6819     ## as part of normal tree validation.
6820 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6821     type => 'XML script lang',
6822     text => $element_state->{script_type},
6823     level => $self->{level}->{uncertain});
6824     ## ISSUE: Should we raise some kind of error for
6825     ## <script type="text/xml">aaaaa</script>?
6826     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6827 wakaba 1.93 } else {
6828     $self->{onsubdoc}->({s => $element_state->{text},
6829     container_node => $item->{node},
6830     media_type => $element_state->{script_type},
6831     is_char_string => 1});
6832     }
6833 wakaba 1.40
6834     $HTMLChecker{check_end}->(@_);
6835 wakaba 1.1 }
6836     },
6837 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6838     ## NOTE: "When used to include script data, the script data must be embedded
6839     ## inline, the format of the data must be given using the type attribute,
6840     ## and the src attribute must not be specified." - not testable.
6841     ## TODO: It would be possible to err <script type=text/plain src=...>
6842 wakaba 1.1 };
6843 wakaba 1.25 ## ISSUE: Significant check and text child node
6844 wakaba 1.1
6845     ## NOTE: When script is disabled.
6846     $Element->{$HTML_NS}->{noscript} = {
6847 wakaba 1.40 %HTMLTransparentChecker,
6848 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6849 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6850     %HTMLAttrStatus,
6851     %HTMLM12NCommonAttrStatus,
6852 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6853 wakaba 1.49 }),
6854 wakaba 1.40 check_start => sub {
6855     my ($self, $item, $element_state) = @_;
6856 wakaba 1.3
6857 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6858 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6859     level => $self->{level}->{must});
6860 wakaba 1.3 }
6861    
6862 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6863     $self->_add_minus_elements ($element_state,
6864     {$HTML_NS => {noscript => 1}});
6865     }
6866 wakaba 1.79
6867     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6868     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6869 wakaba 1.3 },
6870 wakaba 1.40 check_child_element => sub {
6871     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6872     $child_is_transparent, $element_state) = @_;
6873     if ($self->{flag}->{in_head}) {
6874 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6875     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6876 wakaba 1.40 $self->{onerror}->(node => $child_el,
6877     type => 'element not allowed:minus',
6878 wakaba 1.104 level => $self->{level}->{must});
6879 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6880     #
6881     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6882     #
6883     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6884     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6885     $self->{onerror}->(node => $child_el,
6886     type => 'element not allowed:head noscript',
6887 wakaba 1.104 level => $self->{level}->{must});
6888 wakaba 1.40 }
6889     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6890 wakaba 1.47 my $http_equiv_attr
6891     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6892     if ($http_equiv_attr) {
6893     ## TODO: case
6894     if (lc $http_equiv_attr->value eq 'content-type') {
6895 wakaba 1.40 $self->{onerror}->(node => $child_el,
6896 wakaba 1.34 type => 'element not allowed:head noscript',
6897 wakaba 1.104 level => $self->{level}->{must});
6898 wakaba 1.47 } else {
6899     #
6900 wakaba 1.3 }
6901 wakaba 1.47 } else {
6902     $self->{onerror}->(node => $child_el,
6903     type => 'element not allowed:head noscript',
6904 wakaba 1.104 level => $self->{level}->{must});
6905 wakaba 1.3 }
6906 wakaba 1.40 } else {
6907     $self->{onerror}->(node => $child_el,
6908     type => 'element not allowed:head noscript',
6909 wakaba 1.104 level => $self->{level}->{must});
6910 wakaba 1.40 }
6911     } else {
6912     $HTMLTransparentChecker{check_child_element}->(@_);
6913     }
6914     },
6915     check_child_text => sub {
6916     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6917     if ($self->{flag}->{in_head}) {
6918     if ($has_significant) {
6919     $self->{onerror}->(node => $child_node,
6920 wakaba 1.104 type => 'character not allowed',
6921     level => $self->{level}->{must});
6922 wakaba 1.3 }
6923     } else {
6924 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6925     }
6926     },
6927     check_end => sub {
6928     my ($self, $item, $element_state) = @_;
6929     $self->_remove_minus_elements ($element_state);
6930     if ($self->{flag}->{in_head}) {
6931     $HTMLChecker{check_end}->(@_);
6932     } else {
6933     $HTMLPhrasingContentChecker{check_end}->(@_);
6934 wakaba 1.3 }
6935 wakaba 1.1 },
6936     };
6937 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6938 wakaba 1.1
6939     $Element->{$HTML_NS}->{'event-source'} = {
6940 wakaba 1.40 %HTMLEmptyChecker,
6941 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6942     check_attrs => $GetHTMLAttrsChecker->({
6943     src => $HTMLURIAttrChecker,
6944     }, {
6945     %HTMLAttrStatus,
6946     src => FEATURE_HTML5_LC_DROPPED,
6947     }),
6948     check_start => sub {
6949     my ($self, $item, $element_state) = @_;
6950    
6951     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6952     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6953     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6954     },
6955     };
6956    
6957     $Element->{$HTML_NS}->{eventsource} = {
6958     %HTMLEmptyChecker,
6959 wakaba 1.153 status => FEATURE_HTML5_WD,
6960 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6961 wakaba 1.1 src => $HTMLURIAttrChecker,
6962 wakaba 1.50 }, {
6963     %HTMLAttrStatus,
6964 wakaba 1.153 src => FEATURE_HTML5_WD,
6965 wakaba 1.1 }),
6966 wakaba 1.66 check_start => sub {
6967     my ($self, $item, $element_state) = @_;
6968    
6969     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6970 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6971     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6972 wakaba 1.66 },
6973 wakaba 1.1 };
6974    
6975     $Element->{$HTML_NS}->{details} = {
6976 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
6977 wakaba 1.153 status => FEATURE_HTML5_LC,
6978 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6979 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
6980 wakaba 1.50 }, {
6981     %HTMLAttrStatus,
6982 wakaba 1.153 open => FEATURE_HTML5_LC,
6983 wakaba 1.1 }),
6984     };
6985    
6986     $Element->{$HTML_NS}->{datagrid} = {
6987 wakaba 1.72 %HTMLFlowContentChecker,
6988 wakaba 1.48 status => FEATURE_HTML5_WD,
6989 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6990 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6991     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6992 wakaba 1.50 }, {
6993     %HTMLAttrStatus,
6994     disabled => FEATURE_HTML5_WD,
6995     multiple => FEATURE_HTML5_WD,
6996 wakaba 1.1 }),
6997 wakaba 1.40 check_start => sub {
6998     my ($self, $item, $element_state) = @_;
6999 wakaba 1.1
7000 wakaba 1.40 $self->_add_minus_elements ($element_state,
7001     {$HTML_NS => {a => 1, datagrid => 1}});
7002 wakaba 1.172
7003 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7004     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7005 wakaba 1.40 },
7006     check_end => sub {
7007     my ($self, $item, $element_state) = @_;
7008     $self->_remove_minus_elements ($element_state);
7009 wakaba 1.1
7010 wakaba 1.172 $HTMLFlowContentChecker{check_end}->(@_);
7011 wakaba 1.40 },
7012 wakaba 1.1 };
7013    
7014     $Element->{$HTML_NS}->{command} = {
7015 wakaba 1.40 %HTMLEmptyChecker,
7016 wakaba 1.48 status => FEATURE_HTML5_WD,
7017 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7018 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7019     default => $GetHTMLBooleanAttrChecker->('default'),
7020     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7021     icon => $HTMLURIAttrChecker,
7022     label => sub { }, ## NOTE: No conformance creteria
7023     radiogroup => sub { }, ## NOTE: No conformance creteria
7024     type => sub {
7025     my ($self, $attr) = @_;
7026     my $value = $attr->value;
7027     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7028 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7029     level => $self->{level}->{must});
7030 wakaba 1.1 }
7031     },
7032 wakaba 1.50 }, {
7033     %HTMLAttrStatus,
7034     checked => FEATURE_HTML5_WD,
7035 wakaba 1.175 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7036 wakaba 1.50 disabled => FEATURE_HTML5_WD,
7037     icon => FEATURE_HTML5_WD,
7038     label => FEATURE_HTML5_WD,
7039     radiogroup => FEATURE_HTML5_WD,
7040     type => FEATURE_HTML5_WD,
7041 wakaba 1.1 }),
7042 wakaba 1.66 check_start => sub {
7043     my ($self, $item, $element_state) = @_;
7044    
7045     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7046 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7047     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7048 wakaba 1.66 },
7049 wakaba 1.115 };
7050    
7051     $Element->{$HTML_NS}->{bb} = {
7052     %HTMLPhrasingContentChecker,
7053 wakaba 1.153 status => FEATURE_HTML5_WD,
7054 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7055     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7056     }, {
7057     %HTMLAttrStatus,
7058 wakaba 1.153 type => FEATURE_HTML5_WD,
7059 wakaba 1.115 }),
7060 wakaba 1.130 check_start => sub {
7061     my ($self, $item, $element_state) = @_;
7062     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7063    
7064     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7065     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7066     },
7067     check_end => sub {
7068     my ($self, $item, $element_state) = @_;
7069     $self->_remove_minus_elements ($element_state);
7070    
7071     $HTMLTransparentChecker{check_end}->(@_);
7072     },
7073 wakaba 1.1 };
7074    
7075     $Element->{$HTML_NS}->{menu} = {
7076 wakaba 1.40 %HTMLPhrasingContentChecker,
7077 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7078     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7079     ## NOTE: We don't want any |menu| element warned as deprecated.
7080 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7081 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7082 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7083 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7084     ## implementation, it does not match.)
7085 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7086     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7087 wakaba 1.49 }, {
7088     %HTMLAttrStatus,
7089     %HTMLM12NCommonAttrStatus,
7090 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7091 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7092 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7093 wakaba 1.50 label => FEATURE_HTML5_WD,
7094 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7095 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7096     sdapref => FEATURE_HTML20_RFC,
7097 wakaba 1.50 type => FEATURE_HTML5_WD,
7098 wakaba 1.1 }),
7099 wakaba 1.40 check_start => sub {
7100     my ($self, $item, $element_state) = @_;
7101     $element_state->{phase} = 'li or phrasing';
7102     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7103     $self->{flag}->{in_menu} = 1;
7104 wakaba 1.79
7105     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7106     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7107 wakaba 1.135 $element_state->{id_type} = 'menu';
7108 wakaba 1.40 },
7109     check_child_element => sub {
7110     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7111     $child_is_transparent, $element_state) = @_;
7112 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7113     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7114 wakaba 1.40 $self->{onerror}->(node => $child_el,
7115     type => 'element not allowed:minus',
7116 wakaba 1.104 level => $self->{level}->{must});
7117 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7118     #
7119     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7120     if ($element_state->{phase} eq 'li') {
7121     #
7122     } elsif ($element_state->{phase} eq 'li or phrasing') {
7123     $element_state->{phase} = 'li';
7124     } else {
7125 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7126     level => $self->{level}->{must});
7127 wakaba 1.40 }
7128     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7129     if ($element_state->{phase} eq 'phrasing') {
7130     #
7131     } elsif ($element_state->{phase} eq 'li or phrasing') {
7132     $element_state->{phase} = 'phrasing';
7133     } else {
7134 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7135     level => $self->{level}->{must});
7136 wakaba 1.40 }
7137     } else {
7138 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7139     level => $self->{level}->{must});
7140 wakaba 1.40 }
7141     },
7142     check_child_text => sub {
7143     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7144     if ($has_significant) {
7145     if ($element_state->{phase} eq 'phrasing') {
7146     #
7147     } elsif ($element_state->{phase} eq 'li or phrasing') {
7148     $element_state->{phase} = 'phrasing';
7149     } else {
7150     $self->{onerror}->(node => $child_node,
7151 wakaba 1.104 type => 'character not allowed',
7152     level => $self->{level}->{must});
7153 wakaba 1.1 }
7154     }
7155 wakaba 1.40 },
7156     check_end => sub {
7157     my ($self, $item, $element_state) = @_;
7158     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7159    
7160     if ($element_state->{phase} eq 'li') {
7161     $HTMLChecker{check_end}->(@_);
7162     } else { # 'phrasing' or 'li or phrasing'
7163     $HTMLPhrasingContentChecker{check_end}->(@_);
7164 wakaba 1.1 }
7165     },
7166 wakaba 1.8 };
7167    
7168     $Element->{$HTML_NS}->{datatemplate} = {
7169 wakaba 1.40 %HTMLChecker,
7170 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7171 wakaba 1.40 check_child_element => sub {
7172     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7173     $child_is_transparent, $element_state) = @_;
7174 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7175     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7176 wakaba 1.40 $self->{onerror}->(node => $child_el,
7177     type => 'element not allowed:minus',
7178 wakaba 1.104 level => $self->{level}->{must});
7179 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7180     #
7181     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7182     #
7183     } else {
7184     $self->{onerror}->(node => $child_el,
7185 wakaba 1.104 type => 'element not allowed:datatemplate',
7186     level => $self->{level}->{must});
7187 wakaba 1.40 }
7188     },
7189     check_child_text => sub {
7190     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7191     if ($has_significant) {
7192 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7193     level => $self->{level}->{must});
7194 wakaba 1.8 }
7195     },
7196     is_xml_root => 1,
7197     };
7198    
7199     $Element->{$HTML_NS}->{rule} = {
7200 wakaba 1.40 %HTMLChecker,
7201 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7202 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7203 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7204 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7205 wakaba 1.50 }, {
7206     %HTMLAttrStatus,
7207     condition => FEATURE_HTML5_AT_RISK,
7208     mode => FEATURE_HTML5_AT_RISK,
7209 wakaba 1.8 }),
7210 wakaba 1.40 check_start => sub {
7211     my ($self, $item, $element_state) = @_;
7212 wakaba 1.79
7213 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7214 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7215     $self->{flag}->{in_rule} = 1;
7216    
7217     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7218     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7219 wakaba 1.40 },
7220     check_child_element => sub { },
7221     check_child_text => sub { },
7222     check_end => sub {
7223     my ($self, $item, $element_state) = @_;
7224 wakaba 1.79
7225 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7226 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7227    
7228 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7229 wakaba 1.8 },
7230     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7231     ## is applied to some conforming data, results in a conforming DOM tree.":
7232     ## We don't check against this.
7233     };
7234    
7235     $Element->{$HTML_NS}->{nest} = {
7236 wakaba 1.40 %HTMLEmptyChecker,
7237 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7238 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7239 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7240     mode => sub {
7241     my ($self, $attr) = @_;
7242     my $value = $attr->value;
7243 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7244 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7245     level => $self->{level}->{must});
7246 wakaba 1.23 }
7247     },
7248 wakaba 1.50 }, {
7249     %HTMLAttrStatus,
7250     filter => FEATURE_HTML5_AT_RISK,
7251     mode => FEATURE_HTML5_AT_RISK,
7252 wakaba 1.8 }),
7253 wakaba 1.1 };
7254    
7255     $Element->{$HTML_NS}->{legend} = {
7256 wakaba 1.40 %HTMLPhrasingContentChecker,
7257 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7258 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7259     # align => $GetHTMLEnumeratedAttrChecker->({
7260     # top => 1, bottom => 1, left => 1, right => 1,
7261     # }),
7262 wakaba 1.167 form => $HTMLFormAttrChecker,
7263 wakaba 1.52 }, {
7264 wakaba 1.49 %HTMLAttrStatus,
7265     %HTMLM12NCommonAttrStatus,
7266 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7267 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7268 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7269 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7270 wakaba 1.49 }),
7271 wakaba 1.170 check_child_element => sub {
7272     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7273     $child_is_transparent, $element_state) = @_;
7274     if ($item->{parent_state}->{in_figure}) {
7275     $HTMLFlowContentChecker{check_child_element}->(@_);
7276     } else {
7277     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7278     }
7279     },
7280     check_child_text => sub {
7281     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7282     if ($item->{parent_state}->{in_figure}) {
7283     $HTMLFlowContentChecker{check_child_text}->(@_);
7284     } else {
7285     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7286     }
7287     },
7288     check_start => sub {
7289     my ($self, $item, $element_state) = @_;
7290     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7291    
7292     $HTMLFlowContentChecker{check_start}->(@_);
7293     },
7294     check_end => sub {
7295     my ($self, $item, $element_state) = @_;
7296     $self->_remove_minus_elements ($element_state);
7297    
7298     $HTMLFlowContentChecker{check_end}->(@_);
7299     },
7300     }; # legend
7301 wakaba 1.1
7302     $Element->{$HTML_NS}->{div} = {
7303 wakaba 1.72 %HTMLFlowContentChecker,
7304 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
7305 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7306     align => $GetHTMLEnumeratedAttrChecker->({
7307     left => 1, center => 1, right => 1, justify => 1,
7308     }),
7309     }, {
7310 wakaba 1.49 %HTMLAttrStatus,
7311 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7312 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7313     datafld => FEATURE_HTML4_REC_RESERVED,
7314     dataformatas => FEATURE_HTML4_REC_RESERVED,
7315     datasrc => FEATURE_HTML4_REC_RESERVED,
7316 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7317 wakaba 1.49 }),
7318 wakaba 1.66 check_start => sub {
7319     my ($self, $item, $element_state) = @_;
7320    
7321     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7322 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7323     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7324 wakaba 1.66 },
7325 wakaba 1.1 };
7326    
7327 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7328 wakaba 1.72 %HTMLFlowContentChecker,
7329 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7330     check_attrs => $GetHTMLAttrsChecker->({}, {
7331     %HTMLAttrStatus,
7332     %HTMLM12NCommonAttrStatus,
7333 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7334 wakaba 1.64 }),
7335     };
7336    
7337 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7338 wakaba 1.40 %HTMLTransparentChecker,
7339 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7340 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7341     ## TODO: HTML4 |size|, |color|, |face|
7342 wakaba 1.49 }, {
7343     %HTMLAttrStatus,
7344 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7345 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7346 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7347 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7348 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7349     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7350 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7351 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7352     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7353 wakaba 1.49 }),
7354 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7355     ## it is allowed only in a document with the WYSIWYG signature. The
7356     ## checker does not check whether there is the signature, since the
7357     ## signature is dropped, too, and has never been implemented. (In addition,
7358     ## for any |font| element an "element not defined" error is raised anyway,
7359     ## such that we don't have to raise an additional error.)
7360 wakaba 1.1 };
7361 wakaba 1.49
7362 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7363     %HTMLEmptyChecker,
7364     status => FEATURE_M12N10_REC_DEPRECATED,
7365     check_attrs => $GetHTMLAttrsChecker->({
7366     ## TODO: color, face, size
7367     }, {
7368     %HTMLAttrStatus,
7369     color => FEATURE_M12N10_REC_DEPRECATED,
7370     face => FEATURE_M12N10_REC_DEPRECATED,
7371 wakaba 1.153 #id => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
7372     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7373 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7374     }),
7375     };
7376    
7377 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7378     ## class title id cols rows onload onunload style(x10)
7379     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7380     ## noframes Common, lang(xhtml10)
7381    
7382 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7383 wakaba 1.56
7384 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7385     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7386     ## xmp, listing sdapref[HTML2,0]
7387    
7388 wakaba 1.56 =pod
7389    
7390 wakaba 1.61 HTML 2.0 nextid @n
7391    
7392     RFC 2659: CERTS CRYPTOPTS
7393    
7394     ISO-HTML: pre-html, divN
7395 wakaba 1.82
7396     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7397     di (Common), nl (Common), handler (Common, type), standby (Common),
7398     summary (Common)
7399    
7400 wakaba 1.97 Access & XHTML2: access (LC)
7401 wakaba 1.82
7402     XML Events & XForms (for XHTML2 support; very, very low priority)
7403 wakaba 1.61
7404 wakaba 1.56 =cut
7405 wakaba 1.61
7406     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7407     ## We added them only to |a|. |link| and |form| might also allow them
7408     ## in theory.
7409 wakaba 1.1
7410     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7411    
7412     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24