/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.177 - (hide annotations) (download)
Sun Jul 5 23:35:33 2009 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.176: +5 -4 lines
++ whatpm/t/dom-conformance/ChangeLog	5 Jul 2009 23:31:57 -0000
2009-07-06  Wakaba  <wakaba@suika.fam.cx>

	* html-flows-1.dat: |header| in |footer| or |address| is no longer
	allowed (HTML5 revision 3051).

++ whatpm/Whatpm/ContentChecker/ChangeLog	5 Jul 2009 23:34:46 -0000
2009-07-06  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Disallow |header| in |footer| or |address| (HTML5
	revision 3051).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.174 ## --- Feature Status ---
10    
11 wakaba 1.89 sub FEATURE_HTML5_COMPLETE () {
12 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
13 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15     }
16 wakaba 1.154 sub FEATURE_HTML5_CR () {
17     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
18     Whatpm::ContentChecker::FEATURE_STATUS_CR |
19     Whatpm::ContentChecker::FEATURE_ALLOWED
20     }
21 wakaba 1.54 sub FEATURE_HTML5_LC () {
22 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
23 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
24     Whatpm::ContentChecker::FEATURE_ALLOWED
25     }
26     sub FEATURE_HTML5_AT_RISK () {
27 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
28     ## status.
29 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
30     Whatpm::ContentChecker::FEATURE_ALLOWED
31     }
32     sub FEATURE_HTML5_WD () {
33 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
34 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
35     Whatpm::ContentChecker::FEATURE_ALLOWED
36     }
37     sub FEATURE_HTML5_FD () {
38 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
39 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
40     Whatpm::ContentChecker::FEATURE_ALLOWED
41     }
42     sub FEATURE_HTML5_DEFAULT () {
43 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
44 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
45     Whatpm::ContentChecker::FEATURE_ALLOWED
46 wakaba 1.49 }
47 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
48 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
49     ## comments, but then dropped.
50 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
51     }
52 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
53 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
54     ## then dropped.
55 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
56     }
57 wakaba 1.154
58 wakaba 1.119 sub FEATURE_WF2X () {
59 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
60     ## incorporated into the HTML5 spec.
61 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
62     }
63 wakaba 1.54 sub FEATURE_WF2 () {
64 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
65     ## merged into HTML5.
66 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
67 wakaba 1.54 }
68 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
69 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
70     ## were not merged into HTML5.
71 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
72     }
73 wakaba 1.49
74 wakaba 1.154 sub FEATURE_RDFA_REC () {
75     Whatpm::ContentChecker::FEATURE_STATUS_REC
76 wakaba 1.121 }
77 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
78     ## NOTE: The feature that was defined in a RDFa last call working
79     ## draft, but then dropped.
80 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
81     }
82 wakaba 1.58
83     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
84     ## attribute can be used- the only requirements for that matter is:
85     ## "the attribute MUST be referenced using its namespace-qualified form" (and
86     ## this is a host language conformance!).
87 wakaba 1.82 sub FEATURE_ROLE_LC () {
88     Whatpm::ContentChecker::FEATURE_STATUS_LC
89     }
90    
91     sub FEATURE_XHTML2_ED () {
92 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
93     ## "http://www.w3.org/1999/xhtml".
94 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
95     }
96 wakaba 1.58
97 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
98 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
99     ## M12N).
100     Whatpm::ContentChecker::FEATURE_STATUS_REC
101 wakaba 1.55 }
102     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
103 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
104     ## features.
105     Whatpm::ContentChecker::FEATURE_STATUS_REC |
106 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
107     }
108    
109 wakaba 1.154 sub FEATURE_RUBY_REC () {
110     Whatpm::ContentChecker::FEATURE_STATUS_CR
111 wakaba 1.82 }
112    
113 wakaba 1.154 sub FEATURE_M12N11_LC () {
114     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
115     Whatpm::ContentChecker::FEATURE_STATUS_REC;
116 wakaba 1.99 }
117    
118 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
119     ## It contains a number of problems. (However, again, it's a REC!)
120 wakaba 1.54 sub FEATURE_M12N10_REC () {
121 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
122 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
123     }
124     sub FEATURE_M12N10_REC_DEPRECATED () {
125     Whatpm::ContentChecker::FEATURE_STATUS_REC |
126     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
127     }
128 wakaba 1.49
129     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
130     ## (second edition). Only missing attributes from M12N10 abstract
131     ## definition are added.
132 wakaba 1.54 sub FEATURE_XHTML10_REC () {
133     Whatpm::ContentChecker::FEATURE_STATUS_CR
134     }
135    
136 wakaba 1.61 ## NOTE: Diff from HTML4.
137     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
138     Whatpm::ContentChecker::FEATURE_STATUS_CR
139     }
140 wakaba 1.58
141 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
142     ## 4.01). Only missing attributes from XHTML10 are added.
143 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
144     Whatpm::ContentChecker::FEATURE_STATUS_WD
145     }
146    
147     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
148     ## rather than presentational attributes (deprecated or not deprecated).
149 wakaba 1.48
150 wakaba 1.61 ## NOTE: Diff from HTML4.
151     sub FEATURE_HTML32_REC_OBSOLETE () {
152     Whatpm::ContentChecker::FEATURE_STATUS_CR |
153     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
154     ## NOTE: Lowercase normative "should".
155     }
156    
157     sub FEATURE_RFC2659 () { ## Experimental RFC
158     Whatpm::ContentChecker::FEATURE_STATUS_CR
159     }
160    
161     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
162     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
163     Whatpm::ContentChecker::FEATURE_STATUS_CR
164     }
165    
166     ## NOTE: Diff from HTML 2.0.
167     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
168     Whatpm::ContentChecker::FEATURE_STATUS_CR
169     }
170    
171     ## NOTE: Diff from HTML 3.2.
172     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
173     Whatpm::ContentChecker::FEATURE_STATUS_CR
174     }
175 wakaba 1.58
176 wakaba 1.174 ## --- Content Model ---
177    
178 wakaba 1.29 ## December 2007 HTML5 Classification
179    
180     my $HTMLMetadataContent = {
181     $HTML_NS => {
182     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
183 wakaba 1.118 'event-source' => 1, eventsource => 1,
184     command => 1, datatemplate => 1,
185 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
186     ## a metadata content other than |head| element.
187     meta => 1,
188     },
189     ## NOTE: RDF is mentioned in the HTML5 spec.
190     ## TODO: Other RDF elements?
191     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
192     };
193    
194 wakaba 1.72 my $HTMLFlowContent = {
195 wakaba 1.29 $HTML_NS => {
196     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
197     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
198     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
199     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
200 wakaba 1.119 form => 1, fieldset => 1,
201 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
202     datagrid => 1, ## ISSUE: "Flow element" in spec.
203 wakaba 1.29 datatemplate => 1,
204     div => 1, ## ISSUE: No category in spec.
205     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
206     ## Additionally, it must be before any other element or
207     ## non-inter-element-whitespace text node.
208     style => 1,
209    
210 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
211 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
212     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
213 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
214 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
215     command => 1, bb => 1,
216 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
217 wakaba 1.121 textarea => 1, output => 1,
218 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
219     ## NOTE: |area| is allowed only as a descendant of |map|.
220     area => 1,
221    
222 wakaba 1.124 ## NOTE: Transparent.
223     a => 1, ins => 1, del => 1, font => 1,
224 wakaba 1.29
225 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
226 wakaba 1.29 menu => 1,
227    
228     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
229     canvas => 1,
230     },
231    
232     ## NOTE: Embedded
233     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
234     q<http://www.w3.org/2000/svg> => {svg => 1},
235     };
236    
237 wakaba 1.58 my $HTMLSectioningContent = {
238 wakaba 1.57 $HTML_NS => {
239     section => 1, nav => 1, article => 1, aside => 1,
240     ## NOTE: |body| is only allowed in |html| element.
241     body => 1,
242     },
243     };
244    
245 wakaba 1.58 my $HTMLSectioningRoot = {
246 wakaba 1.29 $HTML_NS => {
247 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
248 wakaba 1.29 },
249     };
250    
251     my $HTMLHeadingContent = {
252     $HTML_NS => {
253     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
254     },
255     };
256    
257     my $HTMLPhrasingContent = {
258 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
259 wakaba 1.29 $HTML_NS => {
260 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
261 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
262     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
263 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
264 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
265     command => 1, bb => 1,
266 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
267 wakaba 1.121 textarea => 1, output => 1,
268 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
269     ## NOTE: |area| is allowed only as a descendant of |map|.
270     area => 1,
271    
272     ## NOTE: Transparent.
273 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
274 wakaba 1.29
275 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
276 wakaba 1.29 menu => 1,
277    
278     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
279     canvas => 1,
280     },
281    
282     ## NOTE: Embedded
283     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
284     q<http://www.w3.org/2000/svg> => {svg => 1},
285    
286     ## NOTE: And non-inter-element-whitespace text nodes.
287     };
288    
289 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
290 wakaba 1.29
291     my $HTMLInteractiveContent = {
292     $HTML_NS => {
293     a => 1,
294 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
295     details => 1, datagrid => 1, bb => 1,
296    
297     ## NOTE: When "controls" attribute is specified.
298     video => 1, audio => 1,
299    
300     ## NOTE: When "type=toolbar" attribute is specified.
301     menu => 1,
302 wakaba 1.29 },
303     };
304    
305 wakaba 1.139 ## NOTE: Labelable form-associated element.
306     my $LabelableFAE = {
307     $HTML_NS => {
308     input => 1, button => 1, select => 1, textarea => 1,
309     },
310     };
311    
312 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
313    
314 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
315     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
316    
317     ## -- Common attribute syntacx checkers
318    
319 wakaba 1.1 our $AttrChecker;
320 wakaba 1.82 our $AttrStatus;
321 wakaba 1.1
322     my $GetHTMLEnumeratedAttrChecker = sub {
323     my $states = shift; # {value => conforming ? 1 : -1}
324     return sub {
325     my ($self, $attr) = @_;
326     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
327     if ($states->{$value} > 0) {
328     #
329     } elsif ($states->{$value}) {
330 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
331     level => $self->{level}->{must});
332 wakaba 1.1 } else {
333 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
334     level => $self->{level}->{must});
335 wakaba 1.1 }
336     };
337     }; # $GetHTMLEnumeratedAttrChecker
338    
339     my $GetHTMLBooleanAttrChecker = sub {
340     my $local_name = shift;
341     return sub {
342     my ($self, $attr) = @_;
343 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
344 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
345 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
346 wakaba 1.104 level => $self->{level}->{must});
347 wakaba 1.1 }
348     };
349     }; # $GetHTMLBooleanAttrChecker
350    
351 wakaba 1.8 ## Unordered set of space-separated tokens
352 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
353     my $allowed_words = shift;
354     return sub {
355     my ($self, $attr) = @_;
356     my %word;
357 wakaba 1.132 for my $word (grep {length $_}
358     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
359 wakaba 1.92 unless ($word{$word}) {
360     $word{$word} = 1;
361     if (not defined $allowed_words or
362     $allowed_words->{$word}) {
363     #
364     } else {
365 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
366 wakaba 1.92 value => $word,
367 wakaba 1.104 level => $self->{level}->{must});
368 wakaba 1.92 }
369     } else {
370 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
371     value => $word,
372     level => $self->{level}->{must});
373 wakaba 1.92 }
374 wakaba 1.8 }
375 wakaba 1.92 };
376     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
377 wakaba 1.8
378 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
379 wakaba 1.1 ## whose allowed values are defined by the section on link types)
380     my $HTMLLinkTypesAttrChecker = sub {
381 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
382 wakaba 1.1 my %word;
383 wakaba 1.132 for my $word (grep {length $_}
384     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
385 wakaba 1.1 unless ($word{$word}) {
386     $word{$word} = 1;
387 wakaba 1.18 } elsif ($word eq 'up') {
388     #
389 wakaba 1.1 } else {
390 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
391     value => $word,
392     level => $self->{level}->{must});
393 wakaba 1.1 }
394     }
395     ## NOTE: Case sensitive match (since HTML5 spec does not say link
396     ## types are case-insensitive and it says "The value should not
397     ## be confusingly similar to any other defined value (e.g.
398     ## differing only in case).").
399     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
400     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
401     ## values to be used conformingly.
402 wakaba 1.66
403     my $is_hyperlink;
404     my $is_resource;
405 wakaba 1.1 require Whatpm::_LinkTypeList;
406     our $LinkType;
407     for my $word (keys %word) {
408     my $def = $LinkType->{$word};
409     if (defined $def) {
410     if ($def->{status} eq 'accepted') {
411     if (defined $def->{effect}->[$a_or_area]) {
412     #
413     } else {
414     $self->{onerror}->(node => $attr,
415 wakaba 1.104 type => 'link type:bad context',
416     value => $word,
417 wakaba 1.110 level => $self->{level}->{must});
418 wakaba 1.1 }
419     } elsif ($def->{status} eq 'proposal') {
420 wakaba 1.104 $self->{onerror}->(node => $attr,
421     type => 'link type:proposed',
422     value => $word,
423     level => $self->{level}->{should});
424 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
425     #
426     } else {
427     $self->{onerror}->(node => $attr,
428 wakaba 1.104 type => 'link type:bad context',
429     value => $word,
430     level => $self->{level}->{must});
431 wakaba 1.20 }
432 wakaba 1.1 } else { # rejected or synonym
433     $self->{onerror}->(node => $attr,
434 wakaba 1.104 type => 'link type:non-conforming',
435     value => $word,
436     level => $self->{level}->{must});
437 wakaba 1.1 }
438 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
439     if ($word eq 'alternate') {
440     #
441     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
442 wakaba 1.66 $is_hyperlink = 1;
443 wakaba 1.4 }
444     }
445 wakaba 1.1 if ($def->{unique}) {
446     unless ($self->{has_link_type}->{$word}) {
447     $self->{has_link_type}->{$word} = 1;
448     } else {
449     $self->{onerror}->(node => $attr,
450 wakaba 1.104 type => 'link type:duplicate',
451     value => $word,
452     level => $self->{level}->{must});
453 wakaba 1.1 }
454     }
455 wakaba 1.66
456     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
457     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
458     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
459     }
460 wakaba 1.1 } else {
461 wakaba 1.104 $self->{onerror}->(node => $attr,
462     type => 'unknown link type',
463     value => $word,
464     level => $self->{level}->{uncertain});
465 wakaba 1.1 }
466     }
467 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
468 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
469     ## says that using both X-Pingback: header field and HTML
470     ## <link rel=pingback> is deprecated and if both appears they
471     ## SHOULD contain exactly the same value.
472     ## ISSUE: Pingback 1.0 specification defines the exact representation
473     ## of its link element, which cannot be tested by the current arch.
474     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
475     ## include any string that matches to the pattern for the rel=pingback link,
476     ## which again inpossible to test.
477     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
478 wakaba 1.12
479     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
480 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
481     ## then they SHOULD be described in different paragraphs.".
482 wakaba 1.66
483     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
484     if ($is_hyperlink or $a_or_area) {
485     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
486     }
487     if ($is_resource and not $a_or_area) {
488     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
489     }
490 wakaba 1.96
491     $element_state->{link_rel} = \%word;
492 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
493 wakaba 1.20
494     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
495 wakaba 1.1
496     ## URI (or IRI)
497     my $HTMLURIAttrChecker = sub {
498 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
499 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
500     my $value = $attr->value;
501     Whatpm::URIChecker->check_iri_reference ($value, sub {
502 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
503 wakaba 1.106 }), $self->{level};
504 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
505 wakaba 1.66
506     my $attr_name = $attr->name;
507     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
508     ## TODO: absolute
509     push @{$self->{return}->{uri}->{$value} ||= []},
510     $element_state->{uri_info}->{$attr_name};
511 wakaba 1.1 }; # $HTMLURIAttrChecker
512    
513     ## A space separated list of one or more URIs (or IRIs)
514     my $HTMLSpaceURIsAttrChecker = sub {
515     my ($self, $attr) = @_;
516 wakaba 1.66
517     my $type = {ping => 'action',
518     profile => 'namespace',
519     archive => 'resource'}->{$attr->name};
520    
521 wakaba 1.1 my $i = 0;
522 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
523 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
524 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
525 wakaba 1.106 }, $self->{level});
526 wakaba 1.66
527     ## TODO: absolute
528     push @{$self->{return}->{uri}->{$value} ||= []},
529 wakaba 1.67 {node => $attr, type => {$type => 1}};
530 wakaba 1.66
531 wakaba 1.1 $i++;
532     }
533 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
534 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
535     ## ISSUE: A sequence of white space characters are conformant?
536     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
537 wakaba 1.132 ## ISSUE: What is "space"?
538 wakaba 1.1 ## NOTE: Duplication seems not an error.
539 wakaba 1.4 $self->{has_uri_attr} = 1;
540 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
541    
542 wakaba 1.156 my $ValidEmailAddress;
543     {
544     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
545     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
546     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
547     }
548    
549 wakaba 1.168 ## Valid global date and time.
550     my $GetDateTimeAttrChecker = sub ($) {
551     my $type = shift;
552     return sub {
553     my ($self, $attr, $item, $element_state) = @_;
554    
555     my $range_error;
556    
557     require Message::Date;
558     my $dp = Message::Date->new;
559     $dp->{level} = $self->{level};
560     $dp->{onerror} = sub {
561     my %opt = @_;
562     unless ($opt{type} eq 'date value not supported') {
563     $self->{onerror}->(%opt, node => $attr);
564     $range_error = '';
565     }
566     };
567    
568     my $method = 'parse_' . $type;
569     my $d = $dp->$method ($attr->value);
570     $element_state->{date_value}->{$attr->name} = $d || $range_error;
571     };
572     }; # $GetDateTimeAttrChecker
573 wakaba 1.1
574     my $HTMLIntegerAttrChecker = sub {
575     my ($self, $attr) = @_;
576     my $value = $attr->value;
577     unless ($value =~ /\A-?[0-9]+\z/) {
578 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
579     level => $self->{level}->{must});
580 wakaba 1.1 }
581     }; # $HTMLIntegerAttrChecker
582    
583     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
584     my $range_check = shift;
585     return sub {
586     my ($self, $attr) = @_;
587     my $value = $attr->value;
588     if ($value =~ /\A[0-9]+\z/) {
589     unless ($range_check->($value + 0)) {
590 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
591     level => $self->{level}->{must});
592 wakaba 1.1 }
593     } else {
594     $self->{onerror}->(node => $attr,
595 wakaba 1.104 type => 'nninteger:syntax error',
596     level => $self->{level}->{must});
597 wakaba 1.1 }
598     };
599     }; # $GetHTMLNonNegativeIntegerAttrChecker
600    
601     my $GetHTMLFloatingPointNumberAttrChecker = sub {
602     my $range_check = shift;
603     return sub {
604 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
605 wakaba 1.1 my $value = $attr->value;
606 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
607     $value =~ /\A-?\.[0-9]+\z/) {
608 wakaba 1.168 if ($range_check->($value + 0)) {
609     ## TODO: parse algorithm
610     $element_state->{number_value}->{$attr->name} = $value + 0;
611     } else {
612 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
613     level => $self->{level}->{must});
614 wakaba 1.1 }
615     } else {
616     $self->{onerror}->(node => $attr,
617 wakaba 1.104 type => 'float:syntax error',
618     level => $self->{level}->{must});
619 wakaba 1.1 }
620     };
621 wakaba 1.144
622     ## TODO: scientific notation
623 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
624    
625 wakaba 1.148 my $StepAttrChecker = sub {
626     ## NOTE: A valid floating point number (> 0), or ASCII
627     ## case-insensitive "any".
628    
629     my ($self, $attr) = @_;
630     my $value = $attr->value;
631     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
632     $value =~ /\A-?\.[0-9]+\z/) {
633     unless ($value > 0) {
634     $self->{onerror}->(node => $attr, type => 'float:out of range',
635     level => $self->{level}->{must});
636     }
637     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
638     #
639     } else {
640     $self->{onerror}->(node => $attr,
641     type => 'float:syntax error',
642     level => $self->{level}->{must});
643     }
644    
645     ## TODO: scientific
646     }; # $StepAttrChecker
647    
648 wakaba 1.86 ## HTML4 %Length;
649     my $HTMLLengthAttrChecker = sub {
650     my ($self, $attr) = @_;
651     my $value = $attr->value;
652     unless ($value =~ /\A[0-9]+%?\z/) {
653     $self->{onerror}->(node => $attr, type => 'length:syntax error',
654 wakaba 1.104 level => $self->{level}->{must});
655 wakaba 1.86 }
656    
657     ## NOTE: HTML4 definition is too vague - it does not define the syntax
658     ## of percentage value at all (!).
659     }; # $HTMLLengthAttrChecker
660    
661 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
662     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
663     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
664    
665 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
666     ## ISSUE: RFC 2046 does not define syntax of media types.
667     ## ISSUE: The definition of "a valid MIME type" is unknown.
668     ## Syntactical correctness?
669     my $HTMLIMTAttrChecker = sub {
670     my ($self, $attr) = @_;
671     my $value = $attr->value;
672     ## ISSUE: RFC 2045 Content-Type header field allows insertion
673     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
674     ## ISSUE: RFC 2231 extension? Maybe no.
675     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
676     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
677 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
678 wakaba 1.1 my @type = ($1, $2);
679     my $param = $3;
680 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
681 wakaba 1.1 if (defined $2) {
682     push @type, $1 => $2;
683     } else {
684     my $n = $1;
685 wakaba 1.152 my $v = $3;
686 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
687 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
688 wakaba 1.1 }
689     }
690     require Whatpm::IMTChecker;
691 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
692     $ic->{level} = $self->{level};
693     $ic->check_imt (sub {
694 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
695 wakaba 1.1 }, @type);
696     } else {
697 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
698     level => $self->{level}->{must});
699 wakaba 1.1 }
700     }; # $HTMLIMTAttrChecker
701    
702     my $HTMLLanguageTagAttrChecker = sub {
703 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
704    
705 wakaba 1.1 my ($self, $attr) = @_;
706 wakaba 1.6 my $value = $attr->value;
707     require Whatpm::LangTag;
708     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
709 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
710 wakaba 1.106 }, $self->{level});
711 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
712 wakaba 1.6
713     ## TODO: testdata
714 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
715    
716     ## "A valid media query [MQ]"
717     my $HTMLMQAttrChecker = sub {
718     my ($self, $attr) = @_;
719 wakaba 1.104 $self->{onerror}->(node => $attr,
720     type => 'media query',
721     level => $self->{level}->{uncertain});
722 wakaba 1.1 ## ISSUE: What is "a valid media query"?
723     }; # $HTMLMQAttrChecker
724    
725     my $HTMLEventHandlerAttrChecker = sub {
726     my ($self, $attr) = @_;
727 wakaba 1.104 $self->{onerror}->(node => $attr,
728     type => 'event handler',
729     level => $self->{level}->{uncertain});
730 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
731     ## ECMAScript |FunctionBody| production. [ECMA262]
732     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
733     ## ISSUE: Automatic semicolon insertion does not apply?
734     ## ISSUE: Other script languages?
735     }; # $HTMLEventHandlerAttrChecker
736    
737 wakaba 1.136 my $HTMLFormAttrChecker = sub {
738     my ($self, $attr) = @_;
739    
740     ## NOTE: MUST be the ID of a |form| element.
741    
742     my $value = $attr->value;
743 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
744 wakaba 1.136
745     ## ISSUE: <form id=""><input form=""> (empty ID)?
746     }; # $HTMLFormAttrChecker
747    
748 wakaba 1.158 my $ListAttrChecker = sub {
749     my ($self, $attr) = @_;
750    
751     ## NOTE: MUST be the ID of a |datalist| element.
752    
753     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
754    
755     ## TODO: Warn violation to control-dependent restrictions. For
756     ## example, |<input type=url maxlength=10 list=a> <datalist
757     ## id=a><option value=nonurlandtoolong></datalist>| should be
758     ## warned.
759     }; # $ListAttrChecker
760    
761 wakaba 1.160 my $PatternAttrChecker = sub {
762     my ($self, $attr) = @_;
763     $self->{onsubdoc}->({s => $attr->value,
764     container_node => $attr,
765     media_type => 'text/x-regexp-js',
766     is_char_string => 1});
767 wakaba 1.161
768     ## ISSUE: "value must match the Pattern production of ECMA 262's
769     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
770    
771     ## TODO: Warn if @value does not match @pattern.
772 wakaba 1.160 }; # $PatternAttrChecker
773    
774 wakaba 1.161 my $AcceptAttrChecker = sub {
775     my ($self, $attr) = @_;
776    
777     my $value = $attr->value;
778     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
779     my @value = length $value ? split /,/, $value, -1 : ('');
780     my %has_value;
781     for my $v (@value) {
782     if ($has_value{$v}) {
783     $self->{onerror}->(node => $attr,
784     type => 'duplicate token',
785     value => $v,
786     level => $self->{level}->{must});
787     next;
788     }
789     $has_value{$v} = 1;
790    
791     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
792     #
793     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
794     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
795     ## define its own syntax citing RFC 4288.
796    
797     ## NOTE: Parameters not allowed.
798     require Whatpm::IMTChecker;
799     my $ic = Whatpm::IMTChecker->new;
800     $ic->{level} = $self->{level};
801     $ic->check_imt (sub {
802     $self->{onerror}->(@_, node => $attr);
803     }, $1, $2);
804     } else {
805     $self->{onerror}->(node => $attr,
806     type => 'IMTnp:syntax error', ## TODOC: type
807     value => $v,
808     level => $self->{level}->{must});
809     }
810     }
811     }; # $AcceptAttrChecker
812    
813 wakaba 1.165 my $FormControlNameAttrChecker = sub {
814     my ($self, $attr) = @_;
815    
816     unless (length $attr->value) {
817     $self->{onerror}->(node => $attr,
818     type => 'empty control name', ## TODOC: type
819     level => $self->{level}->{must});
820     }
821    
822     ## NOTE: No uniqueness constraint.
823     }; # $FormControlNameAttrChecker
824    
825     my $AutofocusAttrChecker = sub {
826     my ($self, $attr) = @_;
827    
828     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
829    
830     if ($self->{has_autofocus}) {
831     $self->{onerror}->(node => $attr,
832     type => 'duplicate autofocus', ## TODOC: type
833     level => $self->{level}->{must});
834     }
835     $self->{has_autofocus} = 1;
836     }; # $AutofocusAttrChekcer
837    
838 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
839     my ($self, $attr) = @_;
840 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
841 wakaba 1.1 my $value = $attr->value;
842     if ($value =~ s/^#//) {
843 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
844     ## according to the "rules for parsing a hash-name reference" algorithm.
845     ## The document is non-conforming anyway, since |<map name="">| (empty
846     ## name) is non-conforming.
847 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
848     } else {
849 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
850     level => $self->{level}->{must});
851 wakaba 1.1 }
852 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
853 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
854     }; # $HTMLUsemapAttrChecker
855    
856 wakaba 1.76 ## Valid browsing context name
857     my $HTMLBrowsingContextNameAttrChecker = sub {
858     my ($self, $attr) = @_;
859     my $value = $attr->value;
860     if ($value =~ /^_/) {
861     $self->{onerror}->(node => $attr, type => 'window name:reserved',
862 wakaba 1.104 level => $self->{level}->{must},
863 wakaba 1.76 value => $value);
864     } elsif (length $value) {
865     #
866     } else {
867     $self->{onerror}->(node => $attr, type => 'window name:empty',
868 wakaba 1.104 level => $self->{level}->{must});
869 wakaba 1.76 }
870     }; # $HTMLBrowsingContextNameAttrChecker
871    
872     ## Valid browsing context name or keyword
873 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
874     my ($self, $attr) = @_;
875     my $value = $attr->value;
876     if ($value =~ /^_/) {
877     $value = lc $value; ## ISSUE: ASCII case-insentitive?
878     unless ({
879 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
880 wakaba 1.1 }->{$value}) {
881     $self->{onerror}->(node => $attr,
882 wakaba 1.76 type => 'window name:reserved',
883 wakaba 1.104 level => $self->{level}->{must},
884 wakaba 1.76 value => $value);
885 wakaba 1.1 }
886 wakaba 1.76 } elsif (length $value) {
887     #
888 wakaba 1.1 } else {
889 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
890 wakaba 1.104 level => $self->{level}->{must});
891 wakaba 1.1 }
892     }; # $HTMLTargetAttrChecker
893    
894 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
895     my ($self, $attr) = @_;
896    
897     ## ISSUE: Namespace resolution?
898    
899     my $value = $attr->value;
900    
901     require Whatpm::CSS::SelectorsParser;
902     my $p = Whatpm::CSS::SelectorsParser->new;
903     $p->{pseudo_class}->{$_} = 1 for qw/
904     active checked disabled empty enabled first-child first-of-type
905     focus hover indeterminate last-child last-of-type link only-child
906     only-of-type root target visited
907     lang nth-child nth-last-child nth-of-type nth-last-of-type not
908     -manakai-contains -manakai-current
909     /;
910    
911     $p->{pseudo_element}->{$_} = 1 for qw/
912     after before first-letter first-line
913     /;
914    
915 wakaba 1.104 $p->{level} = $self->{level};
916 wakaba 1.23 $p->{onerror} = sub {
917 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
918 wakaba 1.23 };
919     $p->parse_string ($value);
920     }; # $HTMLSelectorsAttrChecker
921    
922 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
923     my ($charset_value, $self, $attr, $ascii_compat) = @_;
924    
925     ## NOTE: This code is used for |charset=""| attributes, |charset=|
926     ## portion of the |content=""| attributes, and |accept-charset=""|
927     ## attributes.
928 wakaba 1.91
929     ## NOTE: Though the case-sensitivility of |charset| attribute value
930     ## is not explicitly spelled in the HTML5 spec, the Character Set
931     ## registry of IANA, which is referenced from HTML5 spec, says that
932     ## charset name is case-insensitive.
933     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
934    
935     require Message::Charset::Info;
936     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
937    
938     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
939     ## Syntactically valid and registered? What about x-charset names?
940     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
941     ($charset_value)) {
942     $self->{onerror}->(node => $attr,
943 wakaba 1.104 type => 'charset:syntax error',
944     value => $charset_value,
945     level => $self->{level}->{must});
946 wakaba 1.91 }
947    
948     if ($charset) {
949     ## ISSUE: What is "the preferred name for that encoding" (for a charset
950     ## with no "preferred MIME name" label)?
951     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
952     if (($charset_status &
953     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
954     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
955     $self->{onerror}->(node => $attr,
956 wakaba 1.104 type => 'charset:not preferred',
957     value => $charset_value,
958     level => $self->{level}->{must});
959 wakaba 1.91 }
960 wakaba 1.129
961 wakaba 1.91 if (($charset_status &
962     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
963     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
964     if ($charset_value =~ /^x-/) {
965     $self->{onerror}->(node => $attr,
966 wakaba 1.104 type => 'charset:private',
967     value => $charset_value,
968     level => $self->{level}->{good});
969 wakaba 1.91 } else {
970     $self->{onerror}->(node => $attr,
971 wakaba 1.104 type => 'charset:not registered',
972     value => $charset_value,
973     level => $self->{level}->{good});
974 wakaba 1.91 }
975     }
976 wakaba 1.129
977     if ($ascii_compat) {
978     if ($charset->{category} &
979     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
980     #
981     } else {
982     $self->{onerror}->(node => $attr,
983     type => 'charset:not ascii compat',
984     value => $charset_value,
985     level => $self->{level}->{must});
986     }
987     }
988    
989 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
990     } elsif ($charset_value =~ /^x-/) {
991     $self->{onerror}->(node => $attr,
992 wakaba 1.104 type => 'charset:private',
993     value => $charset_value,
994     level => $self->{level}->{good});
995 wakaba 1.129
996     ## NOTE: Whether this is an ASCII-compatible character encoding or
997     ## not is unknown.
998 wakaba 1.91 } else {
999     $self->{onerror}->(node => $attr,
1000 wakaba 1.104 type => 'charset:not registered',
1001     value => $charset_value,
1002     level => $self->{level}->{good});
1003 wakaba 1.129
1004     ## NOTE: Whether this is an ASCII-compatible character encoding or
1005     ## not is unknown.
1006 wakaba 1.91 }
1007    
1008     return ($charset, $charset_value);
1009     }; # $HTMLCharsetChecker
1010    
1011 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1012     ## MUST be the preferred name of an ASCII-compatible character
1013     ## encoding".
1014     my $HTMLCharsetsAttrChecker = sub {
1015     my ($self, $attr) = @_;
1016    
1017     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1018    
1019 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1020 wakaba 1.129
1021 wakaba 1.176 ## XXX
1022 wakaba 1.129 ## ISSUE: Uniqueness is not enforced.
1023    
1024     for my $charset (@value) {
1025     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1026     }
1027    
1028     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1029     }; # $HTMLCharsetsAttrChecker
1030    
1031 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1032     my ($self, $attr) = @_;
1033    
1034     ## NOTE: HTML4 "color" or |%Color;|
1035    
1036     my $value = $attr->value;
1037    
1038     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1039 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1040 wakaba 1.105 level => $self->{level}->{html4_fact});
1041 wakaba 1.68 }
1042    
1043     ## TODO: HTML4 has some guideline on usage of color.
1044     }; # $HTMLColorAttrChecker
1045    
1046 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1047     my ($self, $attr) = @_;
1048     $HTMLURIAttrChecker->(@_);
1049    
1050     my $attr_name = $attr->name;
1051    
1052     if ($attr_name eq 'ref') {
1053     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1054     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1055 wakaba 1.104 level => $self->{level}->{must});
1056 wakaba 1.79 }
1057     }
1058 wakaba 1.155
1059     require Message::URL;
1060 wakaba 1.79 my $doc = $attr->owner_document;
1061     my $doc_uri = $doc->document_uri;
1062 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1063 wakaba 1.79 my $no_frag_uri = $uri->clone;
1064     $no_frag_uri->uri_fragment (undef);
1065     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1066     (not defined $doc_uri and $no_frag_uri eq '')) {
1067     my $fragid = $uri->uri_fragment;
1068     if (defined $fragid) {
1069     push @{$self->{$attr_name}}, [$fragid => $attr];
1070     } else {
1071     DOCEL: {
1072     last DOCEL unless $attr_name eq 'template';
1073    
1074     my $docel = $doc->document_element;
1075     if ($docel) {
1076     my $nsuri = $docel->namespace_uri;
1077     if (defined $nsuri and $nsuri eq $HTML_NS) {
1078     if ($docel->manakai_local_name eq 'datatemplate') {
1079     last DOCEL;
1080     }
1081     }
1082     }
1083    
1084     $self->{onerror}->(node => $attr, type => 'template:not template',
1085 wakaba 1.104 level => $self->{level}->{must});
1086 wakaba 1.79 } # DOCEL
1087     }
1088     } else {
1089     ## TODO: An external document is referenced.
1090     ## The document MUST be an HTML or XML document.
1091     ## If there is a fragment identifier, it MUST point a part of the doc.
1092     ## If the attribute is |template|, the pointed part MUST be a
1093     ## |datatemplat| element.
1094     ## If no fragment identifier is specified, the root element MUST be
1095     ## a |datatemplate| element when the attribute is |template|.
1096     }
1097     }; # $HTMLRefOrTemplateAttrChecker
1098    
1099 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1100     my ($self, $attr) = @_;
1101    
1102     if (defined $attr->namespace_uri) {
1103     my $oe = $attr->owner_element;
1104     my $oe_nsuri = $oe->namespace_uri;
1105 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1106 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1107 wakaba 1.104 level => $self->{level}->{must});
1108 wakaba 1.83 }
1109     }
1110    
1111     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1112     }; # $HTMLRepeatIndexAttrChecker
1113    
1114 wakaba 1.1 my $HTMLAttrChecker = {
1115 wakaba 1.176 accesskey => sub {
1116     my ($self, $attr) = @_;
1117    
1118     ## "Ordered set of unique space-separated tokens"
1119    
1120     my %keys;
1121     my @keys = grep {length} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1122    
1123     for my $key (@keys) {
1124     unless ($keys{$key}) {
1125     $keys{$key} = 1;
1126     if (length $key != 1) {
1127     $self->{onerror}->(node => $attr, type => 'char:syntax error',
1128     value => $key,
1129     level => $self->{level}->{must});
1130     }
1131     } else {
1132     $self->{onerror}->(node => $attr, type => 'duplicate token',
1133     value => $key,
1134     level => $self->{level}->{must});
1135     }
1136     }
1137     }, # accesskey
1138    
1139 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1140 wakaba 1.1 id => sub {
1141 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1142 wakaba 1.1 my $value = $attr->value;
1143     if (length $value > 0) {
1144     if ($self->{id}->{$value}) {
1145 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1146     level => $self->{level}->{must});
1147 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1148     } else {
1149     $self->{id}->{$value} = [$attr];
1150 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1151 wakaba 1.1 }
1152 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1153 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1154     level => $self->{level}->{must});
1155 wakaba 1.1 }
1156     } else {
1157     ## NOTE: MUST contain at least one character
1158 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1159     level => $self->{level}->{must});
1160 wakaba 1.1 }
1161     },
1162     title => sub {}, ## NOTE: No conformance creteria
1163     lang => sub {
1164     my ($self, $attr) = @_;
1165 wakaba 1.6 my $value = $attr->value;
1166     if ($value eq '') {
1167     #
1168     } else {
1169     require Whatpm::LangTag;
1170     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1171 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1172 wakaba 1.106 }, $self->{level});
1173 wakaba 1.6 }
1174 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1175 wakaba 1.6
1176     ## TODO: test data
1177 wakaba 1.111
1178     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1179     ## non-conforming. Such errors are detected by the checkers of
1180     ## |{}xml:lang| and |{xml}:lang| attributes.
1181 wakaba 1.1 },
1182     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1183     class => sub {
1184     my ($self, $attr) = @_;
1185 wakaba 1.132
1186     ## NOTE: "Unordered set of unique space-separated tokens".
1187    
1188 wakaba 1.1 my %word;
1189 wakaba 1.132 for my $word (grep {length $_}
1190     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1191 wakaba 1.1 unless ($word{$word}) {
1192     $word{$word} = 1;
1193     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1194     } else {
1195 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1196     value => $word,
1197     level => $self->{level}->{must});
1198 wakaba 1.1 }
1199     }
1200     },
1201 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1202     true => 1, false => 1, '' => 1,
1203     }),
1204 wakaba 1.1 contextmenu => sub {
1205     my ($self, $attr) = @_;
1206     my $value = $attr->value;
1207 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1208 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1209     ## What is "in the DOM"? A menu Element node that is not part
1210     ## of the Document tree is in the DOM? A menu Element node that
1211     ## belong to another Document tree is in the DOM?
1212     },
1213 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1214 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1215 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1216     registrationmark => sub {
1217     my ($self, $attr, $item, $element_state) = @_;
1218    
1219     ## NOTE: Any value is conforming.
1220    
1221     if ($self->{flag}->{in_rule}) {
1222     my $el = $attr->owner_element;
1223     my $ln = $el->manakai_local_name;
1224     if ($ln eq 'nest' or
1225     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1226     my $nsuri = $el->namespace_uri;
1227     if (defined $nsuri and $nsuri eq $HTML_NS) {
1228     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1229 wakaba 1.104 level => $self->{level}->{must});
1230 wakaba 1.79 }
1231     }
1232     } else {
1233     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1234 wakaba 1.104 level => $self->{level}->{must});
1235 wakaba 1.79 }
1236     },
1237 wakaba 1.80 repeat => sub {
1238     my ($self, $attr) = @_;
1239 wakaba 1.83
1240     if (defined $attr->namespace_uri) {
1241     my $oe = $attr->owner_element;
1242     my $oe_nsuri = $oe->namespace_uri;
1243     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1244     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1245 wakaba 1.104 level => $self->{level}->{must});
1246 wakaba 1.83 }
1247     }
1248    
1249 wakaba 1.80 my $value = $attr->value;
1250     if ($value eq 'template') {
1251     #
1252     } elsif ($value =~ /\A-?[0-9]+\z/) {
1253     #
1254     } else {
1255     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1256 wakaba 1.104 level => $self->{level}->{must});
1257 wakaba 1.80 }
1258    
1259     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1260     ## that the attribute MAY be specified to any element, or that the
1261     ## element with that attribute (i.e. a repetition template) can be
1262     ## inserted anywhere in a document tree?
1263     },
1264 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1265     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1266     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1267 wakaba 1.80 'repeat-template' => sub {
1268 wakaba 1.83 my ($self, $attr) = @_;
1269    
1270     if (defined $attr->namespace_uri) {
1271     my $oe = $attr->owner_element;
1272     my $oe_nsuri = $oe->namespace_uri;
1273 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1274 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1275 wakaba 1.104 level => $self->{level}->{must});
1276 wakaba 1.83 }
1277     }
1278    
1279 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1280     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1281     ## attribute allowed on an element that is not a repetition block?
1282     },
1283 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1284 wakaba 1.128 style => sub {
1285     my ($self, $attr) = @_;
1286    
1287     $self->{onsubdoc}->({s => $attr->value,
1288     container_node => $attr,
1289     media_type => 'text/x-css-inline',
1290     is_char_string => 1});
1291    
1292     ## NOTE: "... MUST still be comprehensible and usable if those
1293     ## attributes were removed" is a semantic requirement, it cannot
1294     ## be tested.
1295     },
1296 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1297 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1298 wakaba 1.111 'xml:lang' => sub {
1299     my ($self, $attr) = @_;
1300    
1301     if ($attr->owner_document->manakai_is_html) {
1302     $self->{onerror}->(type => 'in HTML:xml:lang',
1303     level => $self->{level}->{info},
1304     node => $attr);
1305     ## NOTE: This is not an error, but the attribute will be ignored.
1306     } else {
1307     $self->{onerror}->(type => 'in XML:xml:lang',
1308     level => $self->{level}->{html5_no_may},
1309     node => $attr);
1310     ## TODO: We need to add test for this error.
1311     }
1312    
1313     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1314     (undef, 'lang');
1315     if ($lang_attr) {
1316     my $lang_attr_value = $lang_attr->value;
1317     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1318     my $value = $attr->value;
1319     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1320     if ($lang_attr_value ne $value) {
1321     $self->{onerror}->(type => 'xml:lang ne lang',
1322     level => $self->{level}->{must},
1323     node => $attr);
1324     }
1325     } else {
1326     $self->{onerror}->(type => 'xml:lang not allowed',
1327     level => $self->{level}->{must},
1328     node => $attr);
1329     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1330     }
1331     },
1332 wakaba 1.74 xmlns => sub {
1333     my ($self, $attr) = @_;
1334     my $value = $attr->value;
1335     unless ($value eq $HTML_NS) {
1336 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1337     level => $self->{level}->{must});
1338 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1339     }
1340     unless ($attr->owner_document->manakai_is_html) {
1341 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1342     level => $self->{level}->{must});
1343 wakaba 1.74 ## TODO: Test
1344     }
1345    
1346     ## TODO: Should be resolved?
1347     push @{$self->{return}->{uri}->{$value} ||= []},
1348     {node => $attr, type => {namespace => 1}};
1349     },
1350 wakaba 1.1 };
1351    
1352 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1353    
1354 wakaba 1.49 my %HTMLAttrStatus = (
1355 wakaba 1.176 accesskey => FEATURE_HTML5_FD,
1356 wakaba 1.153 class => FEATURE_HTML5_WD,
1357 wakaba 1.50 contenteditable => FEATURE_HTML5_DEFAULT,
1358     contextmenu => FEATURE_HTML5_WD,
1359 wakaba 1.153 dir => FEATURE_HTML5_WD,
1360 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1361 wakaba 1.115 hidden => FEATURE_HTML5_DEFAULT,
1362 wakaba 1.153 id => FEATURE_HTML5_WD,
1363 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1364 wakaba 1.153 lang => FEATURE_HTML5_WD,
1365 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1366     registrationmark => FEATURE_HTML5_AT_RISK,
1367 wakaba 1.60 repeat => FEATURE_WF2,
1368     'repeat-max' => FEATURE_WF2,
1369     'repeat-min' => FEATURE_WF2,
1370     'repeat-start' => FEATURE_WF2,
1371     'repeat-template' => FEATURE_WF2,
1372 wakaba 1.154 role => 0,
1373 wakaba 1.153 style => FEATURE_HTML5_WD,
1374 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1375     template => FEATURE_HTML5_AT_RISK,
1376 wakaba 1.153 title => FEATURE_HTML5_WD,
1377 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1378 wakaba 1.49 );
1379    
1380     my %HTMLM12NCommonAttrStatus = (
1381 wakaba 1.154 about => FEATURE_RDFA_REC,
1382 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1383 wakaba 1.154 content => FEATURE_RDFA_REC,
1384     datatype => FEATURE_RDFA_REC,
1385 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1386 wakaba 1.154 href => FEATURE_RDFA_REC,
1387 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1388 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1389 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1390     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1391     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1392     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1393     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1394     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1395     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1396     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1397     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1398     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1399 wakaba 1.154 property => FEATURE_RDFA_REC,
1400     rel => FEATURE_RDFA_REC,
1401     resource => FEATURE_RDFA_REC,
1402     rev => FEATURE_RDFA_REC,
1403 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1404 wakaba 1.78 # FEATURE_M12N10_REC,
1405 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1406 wakaba 1.55 FEATURE_M12N10_REC,
1407 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1408 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1409 wakaba 1.49 );
1410    
1411 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1412     ## Core
1413 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1414     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1415 wakaba 1.82 #xml:id
1416     layout => FEATURE_XHTML2_ED,
1417 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1418 wakaba 1.82
1419     ## Hypertext
1420     cite => FEATURE_XHTML2_ED,
1421     href => FEATURE_XHTML2_ED,
1422     hreflang => FEATURE_XHTML2_ED,
1423     hrefmedia => FEATURE_XHTML2_ED,
1424     hreftype => FEATURE_XHTML2_ED,
1425     nextfocus => FEATURE_XHTML2_ED,
1426     prevfocus => FEATURE_XHTML2_ED,
1427     target => FEATURE_XHTML2_ED,
1428     #xml:base
1429    
1430     ## I18N
1431     #xml:lang
1432    
1433     ## Bi-directional
1434 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1435 wakaba 1.82
1436     ## Edit
1437     edit => FEATURE_XHTML2_ED,
1438     datetime => FEATURE_XHTML2_ED,
1439    
1440     ## Embedding
1441     encoding => FEATURE_XHTML2_ED,
1442     src => FEATURE_XHTML2_ED,
1443     srctype => FEATURE_XHTML2_ED,
1444    
1445     ## Image Map
1446     usemap => FEATURE_XHTML2_ED,
1447     ismap => FEATURE_XHTML2_ED,
1448     shape => FEATURE_XHTML2_ED,
1449     coords => FEATURE_XHTML2_ED,
1450    
1451     ## Media
1452     media => FEATURE_XHTML2_ED,
1453    
1454     ## Metadata
1455     about => FEATURE_XHTML2_ED,
1456     content => FEATURE_XHTML2_ED,
1457     datatype => FEATURE_XHTML2_ED,
1458     instanceof => FEATURE_XHTML2_ED,
1459     property => FEATURE_XHTML2_ED,
1460     rel => FEATURE_XHTML2_ED,
1461     resource => FEATURE_XHTML2_ED,
1462     rev => FEATURE_XHTML2_ED,
1463    
1464     ## Role
1465 wakaba 1.154 role => FEATURE_XHTML2_ED,
1466 wakaba 1.82
1467     ## Style
1468 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML2_ED, # "strongly discouraged"
1469 wakaba 1.82 );
1470    
1471     my %HTMLM12NXHTML2CommonAttrStatus = (
1472     %HTMLM12NCommonAttrStatus,
1473     %XHTML2CommonAttrStatus,
1474    
1475 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1476 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1477 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1478     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1479 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1480 wakaba 1.154 href => FEATURE_RDFA_REC,
1481 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1482 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1483     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1484     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1485     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1486     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1487 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1488 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1489 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1490 wakaba 1.82 FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1491 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1492 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1493 wakaba 1.82 );
1494    
1495 wakaba 1.1 for (qw/
1496     onabort onbeforeunload onblur onchange onclick oncontextmenu
1497     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1498     ondragstart ondrop onerror onfocus onkeydown onkeypress
1499     onkeyup onload onmessage onmousedown onmousemove onmouseout
1500     onmouseover onmouseup onmousewheel onresize onscroll onselect
1501 wakaba 1.77 onstorage onsubmit onunload
1502 wakaba 1.1 /) {
1503     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1504 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1505 wakaba 1.1 }
1506    
1507 wakaba 1.170 for (qw/
1508     ondataunavailable
1509     /) {
1510     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1511     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1512     }
1513    
1514 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1515     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1516     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1517    
1518     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1519     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1520     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1521     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1522     }
1523    
1524 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1525 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1526 wakaba 1.82 }
1527 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1528     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1529 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1530     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1531     ismap layout media nextfocus prevfocus shape src srctype style
1532     target usemap/) {
1533     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1534     }
1535     for (qw/class dir id title/) {
1536     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1537     }
1538     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1539     onmouseout onkeypress onkeydown onkeyup/) {
1540     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1541     }
1542    
1543 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1544     ## NOTE: "Authors should ... when the attributes are ignored and
1545     ## any associated CSS dropped, the page is still usable." (semantic
1546     ## constraint.)
1547     }; # $HTMLDatasetAttrChecker
1548    
1549 wakaba 1.153 my $HTMLDatasetAttrStatus = FEATURE_HTML5_WD;
1550 wakaba 1.73
1551 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1552     my $element_specific_checker = shift;
1553 wakaba 1.49 my $element_specific_status = shift;
1554 wakaba 1.1 return sub {
1555 wakaba 1.40 my ($self, $item, $element_state) = @_;
1556     for my $attr (@{$item->{node}->attributes}) {
1557 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1558     $attr_ns = '' unless defined $attr_ns;
1559     my $attr_ln = $attr->manakai_local_name;
1560     my $checker;
1561 wakaba 1.73 my $status;
1562 wakaba 1.1 if ($attr_ns eq '') {
1563 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1564     $attr_ln !~ /[A-Z]/) {
1565 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1566     $status = $HTMLDatasetAttrStatus;
1567     } else {
1568     $checker = $element_specific_checker->{$attr_ln}
1569     || $HTMLAttrChecker->{$attr_ln};
1570     $status = $element_specific_status->{$attr_ln};
1571     }
1572 wakaba 1.1 }
1573     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1574 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1575 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1576     || $AttrStatus->{$attr_ns}->{''};
1577     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1578 wakaba 1.1 if ($checker) {
1579 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1580 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1581 wakaba 1.54 #
1582 wakaba 1.1 } else {
1583 wakaba 1.104 $self->{onerror}->(node => $attr,
1584     type => 'unknown attribute',
1585     level => $self->{level}->{uncertain});
1586 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1587     }
1588 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1589 wakaba 1.1 }
1590     };
1591     }; # $GetHTMLAttrsChecker
1592    
1593 wakaba 1.40 my %HTMLChecker = (
1594     %Whatpm::ContentChecker::AnyChecker,
1595 wakaba 1.79 check_start => sub {
1596     my ($self, $item, $element_state) = @_;
1597    
1598     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1599     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1600     },
1601 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1602 wakaba 1.40 );
1603    
1604     my %HTMLEmptyChecker = (
1605     %HTMLChecker,
1606     check_child_element => sub {
1607     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1608     $child_is_transparent, $element_state) = @_;
1609 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1610     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1611 wakaba 1.40 $self->{onerror}->(node => $child_el,
1612     type => 'element not allowed:minus',
1613 wakaba 1.104 level => $self->{level}->{must});
1614 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1615     #
1616     } else {
1617     $self->{onerror}->(node => $child_el,
1618     type => 'element not allowed:empty',
1619 wakaba 1.104 level => $self->{level}->{must});
1620 wakaba 1.40 }
1621     },
1622     check_child_text => sub {
1623     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1624     if ($has_significant) {
1625     $self->{onerror}->(node => $child_node,
1626     type => 'character not allowed:empty',
1627 wakaba 1.104 level => $self->{level}->{must});
1628 wakaba 1.40 }
1629     },
1630     );
1631    
1632     my %HTMLTextChecker = (
1633     %HTMLChecker,
1634     check_child_element => sub {
1635     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1636     $child_is_transparent, $element_state) = @_;
1637 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1638     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1639 wakaba 1.40 $self->{onerror}->(node => $child_el,
1640     type => 'element not allowed:minus',
1641 wakaba 1.104 level => $self->{level}->{must});
1642 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1643     #
1644     } else {
1645 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1646     level => $self->{level}->{must});
1647 wakaba 1.40 }
1648     },
1649     );
1650    
1651 wakaba 1.72 my %HTMLFlowContentChecker = (
1652 wakaba 1.40 %HTMLChecker,
1653     check_child_element => sub {
1654     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1655     $child_is_transparent, $element_state) = @_;
1656 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1657     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1658 wakaba 1.40 $self->{onerror}->(node => $child_el,
1659     type => 'element not allowed:minus',
1660 wakaba 1.104 level => $self->{level}->{must});
1661 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1662     #
1663     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1664     if ($element_state->{has_non_style} or
1665     not $child_el->has_attribute_ns (undef, 'scoped')) {
1666 wakaba 1.104 $self->{onerror}->(node => $child_el,
1667 wakaba 1.72 type => 'element not allowed:flow style',
1668 wakaba 1.104 level => $self->{level}->{must});
1669 wakaba 1.40 }
1670 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1671 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1672 wakaba 1.40 } else {
1673     $element_state->{has_non_style} = 1;
1674 wakaba 1.104 $self->{onerror}->(node => $child_el,
1675 wakaba 1.72 type => 'element not allowed:flow',
1676 wakaba 1.104 level => $self->{level}->{must})
1677 wakaba 1.40 }
1678     },
1679     check_child_text => sub {
1680     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1681     if ($has_significant) {
1682     $element_state->{has_non_style} = 1;
1683     }
1684     },
1685     check_end => sub {
1686     my ($self, $item, $element_state) = @_;
1687 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1688 wakaba 1.40 if ($element_state->{has_significant}) {
1689 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1690 wakaba 1.40 } elsif ($item->{transparent}) {
1691     #
1692     } else {
1693     $self->{onerror}->(node => $item->{node},
1694 wakaba 1.104 level => $self->{level}->{should},
1695 wakaba 1.40 type => 'no significant content');
1696     }
1697     },
1698     );
1699    
1700     my %HTMLPhrasingContentChecker = (
1701     %HTMLChecker,
1702     check_child_element => sub {
1703     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1704     $child_is_transparent, $element_state) = @_;
1705 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1706     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1707 wakaba 1.40 $self->{onerror}->(node => $child_el,
1708     type => 'element not allowed:minus',
1709 wakaba 1.104 level => $self->{level}->{must});
1710 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1711     #
1712     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1713     #
1714     } else {
1715     $self->{onerror}->(node => $child_el,
1716     type => 'element not allowed:phrasing',
1717 wakaba 1.104 level => $self->{level}->{must});
1718 wakaba 1.40 }
1719     },
1720 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1721 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1722 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1723 wakaba 1.40 ## and |check_child_text|.
1724     );
1725    
1726 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1727 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1728 wakaba 1.46 ## with parent?
1729 wakaba 1.40
1730 wakaba 1.1 our $Element;
1731     our $ElementDefault;
1732    
1733     $Element->{$HTML_NS}->{''} = {
1734 wakaba 1.40 %HTMLChecker,
1735 wakaba 1.1 };
1736    
1737     $Element->{$HTML_NS}->{html} = {
1738 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1739 wakaba 1.1 is_root => 1,
1740 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1741 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1742 wakaba 1.67 version => sub {
1743     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1744     ## Though DTDs of various versions of HTML define the attribute
1745     ## as |#FIXED|, this conformance checker does no check for
1746     ## the attribute value, since what kind of check should be done
1747     ## is unknown.
1748     },
1749 wakaba 1.49 }, {
1750     %HTMLAttrStatus,
1751 wakaba 1.82 %XHTML2CommonAttrStatus,
1752 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1753     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1754     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1755     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1756     manifest => FEATURE_HTML5_WD,
1757 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1758 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1759 wakaba 1.1 }),
1760 wakaba 1.40 check_start => sub {
1761     my ($self, $item, $element_state) = @_;
1762     $element_state->{phase} = 'before head';
1763 wakaba 1.79
1764 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1765 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1766     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1767 wakaba 1.40 },
1768     check_child_element => sub {
1769     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1770     $child_is_transparent, $element_state) = @_;
1771 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1772     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1773 wakaba 1.40 $self->{onerror}->(node => $child_el,
1774     type => 'element not allowed:minus',
1775 wakaba 1.104 level => $self->{level}->{must});
1776 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1777     #
1778     } elsif ($element_state->{phase} eq 'before head') {
1779     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1780     $element_state->{phase} = 'after head';
1781     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1782     $self->{onerror}->(node => $child_el,
1783 wakaba 1.104 type => 'ps element missing',
1784     text => 'head',
1785     level => $self->{level}->{must});
1786 wakaba 1.40 $element_state->{phase} = 'after body';
1787     } else {
1788     $self->{onerror}->(node => $child_el,
1789 wakaba 1.104 type => 'element not allowed',
1790     level => $self->{level}->{must});
1791 wakaba 1.40 }
1792     } elsif ($element_state->{phase} eq 'after head') {
1793     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1794     $element_state->{phase} = 'after body';
1795     } else {
1796     $self->{onerror}->(node => $child_el,
1797 wakaba 1.104 type => 'element not allowed',
1798     level => $self->{level}->{must});
1799 wakaba 1.40 }
1800     } elsif ($element_state->{phase} eq 'after body') {
1801     $self->{onerror}->(node => $child_el,
1802 wakaba 1.104 type => 'element not allowed',
1803     level => $self->{level}->{must});
1804 wakaba 1.40 } else {
1805     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1806     }
1807     },
1808     check_child_text => sub {
1809     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1810     if ($has_significant) {
1811     $self->{onerror}->(node => $child_node,
1812 wakaba 1.104 type => 'character not allowed',
1813     level => $self->{level}->{must});
1814 wakaba 1.40 }
1815     },
1816     check_end => sub {
1817     my ($self, $item, $element_state) = @_;
1818     if ($element_state->{phase} eq 'after body') {
1819     #
1820     } elsif ($element_state->{phase} eq 'before head') {
1821     $self->{onerror}->(node => $item->{node},
1822 wakaba 1.104 type => 'child element missing',
1823     text => 'head',
1824     level => $self->{level}->{must});
1825 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1826 wakaba 1.104 type => 'child element missing',
1827     text => 'body',
1828     level => $self->{level}->{must});
1829 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1830     $self->{onerror}->(node => $item->{node},
1831 wakaba 1.104 type => 'child element missing',
1832     text => 'body',
1833     level => $self->{level}->{must});
1834 wakaba 1.40 } else {
1835     die "check_end: Bad |html| phase: $element_state->{phase}";
1836     }
1837 wakaba 1.1
1838 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1839     },
1840     };
1841 wakaba 1.25
1842 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1843 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1844 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1845     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1846     }, {
1847 wakaba 1.49 %HTMLAttrStatus,
1848 wakaba 1.82 %XHTML2CommonAttrStatus,
1849 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1850     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1851     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1852     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1853 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1854     }),
1855 wakaba 1.40 check_child_element => sub {
1856     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1857     $child_is_transparent, $element_state) = @_;
1858 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1859     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1860 wakaba 1.40 $self->{onerror}->(node => $child_el,
1861     type => 'element not allowed:minus',
1862 wakaba 1.104 level => $self->{level}->{must});
1863 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1864     #
1865     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1866     unless ($element_state->{has_title}) {
1867     $element_state->{has_title} = 1;
1868     } else {
1869     $self->{onerror}->(node => $child_el,
1870     type => 'element not allowed:head title',
1871 wakaba 1.104 level => $self->{level}->{must});
1872 wakaba 1.40 }
1873     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1874     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1875     $self->{onerror}->(node => $child_el,
1876     type => 'element not allowed:head style',
1877 wakaba 1.104 level => $self->{level}->{must});
1878 wakaba 1.1 }
1879 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1880     #
1881    
1882     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1883     ## a |meta| element with none of |charset|, |name|,
1884     ## or |http-equiv| attribute is not allowed. It is non-conforming
1885     ## anyway.
1886 wakaba 1.56
1887     ## TODO: |form| MUST be empty and in XML [WF2].
1888 wakaba 1.40 } else {
1889     $self->{onerror}->(node => $child_el,
1890     type => 'element not allowed:metadata',
1891 wakaba 1.104 level => $self->{level}->{must});
1892 wakaba 1.40 }
1893     $element_state->{in_head_original} = $self->{flag}->{in_head};
1894     $self->{flag}->{in_head} = 1;
1895     },
1896     check_child_text => sub {
1897     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1898     if ($has_significant) {
1899 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1900     level => $self->{level}->{must});
1901 wakaba 1.1 }
1902 wakaba 1.40 },
1903     check_end => sub {
1904     my ($self, $item, $element_state) = @_;
1905     unless ($element_state->{has_title}) {
1906     $self->{onerror}->(node => $item->{node},
1907 wakaba 1.104 type => 'child element missing',
1908     text => 'title',
1909 wakaba 1.105 level => $self->{level}->{must});
1910 wakaba 1.1 }
1911 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1912 wakaba 1.1
1913 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1914 wakaba 1.1 },
1915     };
1916    
1917 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1918     %HTMLTextChecker,
1919 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1920 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1921     %HTMLAttrStatus,
1922 wakaba 1.82 %XHTML2CommonAttrStatus,
1923 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1924     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1925     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1926     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1927 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1928 wakaba 1.49 }),
1929 wakaba 1.40 };
1930 wakaba 1.1
1931 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1932 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1933 wakaba 1.40 %HTMLEmptyChecker,
1934     check_attrs => sub {
1935     my ($self, $item, $element_state) = @_;
1936 wakaba 1.1
1937 wakaba 1.40 if ($self->{has_base}) {
1938     $self->{onerror}->(node => $item->{node},
1939 wakaba 1.104 type => 'element not allowed:base',
1940     level => $self->{level}->{must});
1941 wakaba 1.40 } else {
1942     $self->{has_base} = 1;
1943 wakaba 1.29 }
1944    
1945 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1946     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1947 wakaba 1.14
1948     if ($self->{has_uri_attr} and $has_href) {
1949 wakaba 1.4 ## ISSUE: Are these examples conforming?
1950     ## <head profile="a b c"><base href> (except for |profile|'s
1951     ## non-conformance)
1952     ## <title xml:base="relative"/><base href/> (maybe it should be)
1953     ## <unknown xmlns="relative"/><base href/> (assuming that
1954     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1955     ## <style>@import 'relative';</style><base href>
1956     ## <script>location.href = 'relative';</script><base href>
1957 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1958     ## an exception.
1959 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1960 wakaba 1.104 type => 'basehref after URL attribute',
1961     level => $self->{level}->{must});
1962 wakaba 1.4 }
1963 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1964 wakaba 1.4 ## ISSUE: Are these examples conforming?
1965     ## <head><title xlink:href=""/><base target="name"/></head>
1966     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1967     ## (assuming that |xbl:xbl| is allowed before |base|)
1968     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1969     ## <link href=""/><base target="name"/>
1970     ## <link rel=unknown href=""><base target=name>
1971 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1972 wakaba 1.104 type => 'basetarget after hyperlink',
1973     level => $self->{level}->{must});
1974 wakaba 1.4 }
1975    
1976 wakaba 1.14 if (not $has_href and not $has_target) {
1977 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1978 wakaba 1.104 type => 'attribute missing:href|target',
1979     level => $self->{level}->{must});
1980 wakaba 1.14 }
1981    
1982 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1983    
1984 wakaba 1.4 return $GetHTMLAttrsChecker->({
1985     href => $HTMLURIAttrChecker,
1986     target => $HTMLTargetAttrChecker,
1987 wakaba 1.49 }, {
1988     %HTMLAttrStatus,
1989 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1990     id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1991     target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1992 wakaba 1.40 })->($self, $item, $element_state);
1993 wakaba 1.4 },
1994 wakaba 1.1 };
1995    
1996     $Element->{$HTML_NS}->{link} = {
1997 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1998 wakaba 1.40 %HTMLEmptyChecker,
1999     check_attrs => sub {
2000     my ($self, $item, $element_state) = @_;
2001 wakaba 1.96 my $sizes_attr;
2002 wakaba 1.1 $GetHTMLAttrsChecker->({
2003 wakaba 1.91 charset => sub {
2004     my ($self, $attr) = @_;
2005     $HTMLCharsetChecker->($attr->value, @_);
2006     },
2007 wakaba 1.1 href => $HTMLURIAttrChecker,
2008 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2009 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2010 wakaba 1.1 media => $HTMLMQAttrChecker,
2011     hreflang => $HTMLLanguageTagAttrChecker,
2012 wakaba 1.96 sizes => sub {
2013     my ($self, $attr) = @_;
2014     $sizes_attr = $attr;
2015     my %word;
2016     for my $word (grep {length $_}
2017 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2018 wakaba 1.96 unless ($word{$word}) {
2019     $word{$word} = 1;
2020     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2021     #
2022     } else {
2023     $self->{onerror}->(node => $attr,
2024 wakaba 1.104 type => 'sizes:syntax error',
2025 wakaba 1.96 value => $word,
2026 wakaba 1.104 level => $self->{level}->{must});
2027 wakaba 1.96 }
2028     } else {
2029     $self->{onerror}->(node => $attr, type => 'duplicate token',
2030     value => $word,
2031 wakaba 1.104 level => $self->{level}->{must});
2032 wakaba 1.96 }
2033     }
2034     },
2035 wakaba 1.70 target => $HTMLTargetAttrChecker,
2036 wakaba 1.1 type => $HTMLIMTAttrChecker,
2037     ## NOTE: Though |title| has special semantics,
2038     ## syntactically same as the |title| as global attribute.
2039 wakaba 1.49 }, {
2040     %HTMLAttrStatus,
2041 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2042 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2043     ## NOTE: |charset| attribute had been part of HTML5 spec though
2044     ## it had been commented out.
2045 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2046 wakaba 1.82 FEATURE_M12N10_REC,
2047 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2048     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2049     media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2050 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2051 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2052 wakaba 1.153 FEATURE_M12N10_REC,
2053 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2054 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2055 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2056 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2057 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2058     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2059 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2060 wakaba 1.40 })->($self, $item, $element_state);
2061 wakaba 1.96
2062 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2063     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2064 wakaba 1.4 } else {
2065 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2066 wakaba 1.104 type => 'attribute missing',
2067     text => 'href',
2068     level => $self->{level}->{must});
2069 wakaba 1.1 }
2070 wakaba 1.96
2071 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2072     $self->{onerror}->(node => $item->{node},
2073 wakaba 1.104 type => 'attribute missing',
2074     text => 'rel',
2075     level => $self->{level}->{must});
2076 wakaba 1.96 }
2077    
2078     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2079     $self->{onerror}->(node => $sizes_attr,
2080     type => 'attribute not allowed',
2081 wakaba 1.104 level => $self->{level}->{must});
2082 wakaba 1.1 }
2083 wakaba 1.116
2084     if ($element_state->{link_rel}->{alternate} and
2085     $element_state->{link_rel}->{stylesheet}) {
2086     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2087     unless ($title_attr) {
2088     $self->{onerror}->(node => $item->{node},
2089     type => 'attribute missing',
2090     text => 'title',
2091     level => $self->{level}->{must});
2092     } elsif ($title_attr->value eq '') {
2093     $self->{onerror}->(node => $title_attr,
2094     type => 'empty style sheet title',
2095     level => $self->{level}->{must});
2096     }
2097     }
2098 wakaba 1.1 },
2099     };
2100    
2101     $Element->{$HTML_NS}->{meta} = {
2102 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2103 wakaba 1.40 %HTMLEmptyChecker,
2104     check_attrs => sub {
2105     my ($self, $item, $element_state) = @_;
2106 wakaba 1.1 my $name_attr;
2107     my $http_equiv_attr;
2108     my $charset_attr;
2109     my $content_attr;
2110 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2111 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2112     $attr_ns = '' unless defined $attr_ns;
2113     my $attr_ln = $attr->manakai_local_name;
2114     my $checker;
2115 wakaba 1.73 my $status;
2116 wakaba 1.1 if ($attr_ns eq '') {
2117 wakaba 1.73 $status = {
2118     %HTMLAttrStatus,
2119 wakaba 1.82 %XHTML2CommonAttrStatus,
2120 wakaba 1.153 charset => FEATURE_HTML5_WD,
2121     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2122     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2123     'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2124     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2125     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2126     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2127 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2128     }->{$attr_ln};
2129    
2130 wakaba 1.1 if ($attr_ln eq 'content') {
2131     $content_attr = $attr;
2132     $checker = 1;
2133     } elsif ($attr_ln eq 'name') {
2134     $name_attr = $attr;
2135     $checker = 1;
2136     } elsif ($attr_ln eq 'http-equiv') {
2137     $http_equiv_attr = $attr;
2138     $checker = 1;
2139     } elsif ($attr_ln eq 'charset') {
2140     $charset_attr = $attr;
2141     $checker = 1;
2142 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2143 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2144 wakaba 1.67 $checker = sub {};
2145 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2146     $attr_ln !~ /[A-Z]/) {
2147 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2148     $status = $HTMLDatasetAttrStatus;
2149 wakaba 1.1 } else {
2150     $checker = $HTMLAttrChecker->{$attr_ln}
2151 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2152 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2153     }
2154     } else {
2155     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2156 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2157     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2158     || $AttrStatus->{$attr_ns}->{''};
2159     $status = FEATURE_ALLOWED if not defined $status;
2160 wakaba 1.1 }
2161 wakaba 1.62
2162 wakaba 1.1 if ($checker) {
2163 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2164 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2165 wakaba 1.54 #
2166 wakaba 1.1 } else {
2167 wakaba 1.104 $self->{onerror}->(node => $attr,
2168     type => 'unknown attribute',
2169     level => $self->{level}->{uncertain});
2170 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2171     }
2172    
2173 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2174 wakaba 1.1 }
2175    
2176     if (defined $name_attr) {
2177     if (defined $http_equiv_attr) {
2178     $self->{onerror}->(node => $http_equiv_attr,
2179 wakaba 1.104 type => 'attribute not allowed',
2180     level => $self->{level}->{must});
2181 wakaba 1.1 } elsif (defined $charset_attr) {
2182     $self->{onerror}->(node => $charset_attr,
2183 wakaba 1.104 type => 'attribute not allowed',
2184     level => $self->{level}->{must});
2185 wakaba 1.1 }
2186     my $metadata_name = $name_attr->value;
2187     my $metadata_value;
2188     if (defined $content_attr) {
2189     $metadata_value = $content_attr->value;
2190     } else {
2191 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2192 wakaba 1.104 type => 'attribute missing',
2193     text => 'content',
2194     level => $self->{level}->{must});
2195 wakaba 1.1 $metadata_value = '';
2196     }
2197     } elsif (defined $http_equiv_attr) {
2198     if (defined $charset_attr) {
2199     $self->{onerror}->(node => $charset_attr,
2200 wakaba 1.104 type => 'attribute not allowed',
2201     level => $self->{level}->{must});
2202 wakaba 1.1 }
2203     unless (defined $content_attr) {
2204 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2205 wakaba 1.104 type => 'attribute missing',
2206     text => 'content',
2207     level => $self->{level}->{must});
2208 wakaba 1.1 }
2209     } elsif (defined $charset_attr) {
2210     if (defined $content_attr) {
2211     $self->{onerror}->(node => $content_attr,
2212 wakaba 1.104 type => 'attribute not allowed',
2213     level => $self->{level}->{must});
2214 wakaba 1.1 }
2215     } else {
2216     if (defined $content_attr) {
2217     $self->{onerror}->(node => $content_attr,
2218 wakaba 1.104 type => 'attribute not allowed',
2219     level => $self->{level}->{must});
2220 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2221 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2222     level => $self->{level}->{must});
2223 wakaba 1.1 } else {
2224 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2225 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2226     level => $self->{level}->{must});
2227 wakaba 1.1 }
2228     }
2229    
2230 wakaba 1.32 my $check_charset_decl = sub () {
2231 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2232 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2233     for my $el (@{$parent->child_nodes}) {
2234     next unless $el->node_type == 1; # ELEMENT_NODE
2235 wakaba 1.40 unless ($el eq $item->{node}) {
2236 wakaba 1.29 ## NOTE: Not the first child element.
2237 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2238 wakaba 1.32 type => 'element not allowed:meta charset',
2239 wakaba 1.104 level => $self->{level}->{must});
2240 wakaba 1.29 }
2241     last;
2242     ## NOTE: Entity references are not supported.
2243     }
2244     } else {
2245 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2246 wakaba 1.32 type => 'element not allowed:meta charset',
2247 wakaba 1.104 level => $self->{level}->{must});
2248 wakaba 1.29 }
2249    
2250 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2251     $self->{onerror}->(node => $item->{node},
2252 wakaba 1.32 type => 'in XML:charset',
2253 wakaba 1.104 level => $self->{level}->{must});
2254 wakaba 1.1 }
2255 wakaba 1.32 }; # $check_charset_decl
2256 wakaba 1.21
2257 wakaba 1.32 my $check_charset = sub ($$) {
2258     my ($attr, $charset_value) = @_;
2259 wakaba 1.21
2260 wakaba 1.91 my $charset;
2261     ($charset, $charset_value)
2262     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2263    
2264 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2265 wakaba 1.21 if (defined $ic) {
2266     ## TODO: Test for this case
2267     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2268     if ($charset ne $ic_charset) {
2269 wakaba 1.32 $self->{onerror}->(node => $attr,
2270 wakaba 1.104 type => 'mismatched charset name',
2271 wakaba 1.106 text => $ic,
2272 wakaba 1.104 value => $charset_value,
2273     level => $self->{level}->{must});
2274 wakaba 1.21 }
2275     } else {
2276     ## NOTE: MUST, but not checkable, since the document is not originally
2277     ## in serialized form (or the parser does not preserve the input
2278     ## encoding information).
2279 wakaba 1.32 $self->{onerror}->(node => $attr,
2280 wakaba 1.104 type => 'mismatched charset name not checked',
2281     value => $charset_value,
2282     level => $self->{level}->{uncertain});
2283 wakaba 1.21 }
2284    
2285 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2286     $self->{onerror}->(node => $attr,
2287 wakaba 1.104 type => 'charref in charset',
2288     level => $self->{level}->{must},
2289     layer => 'syntax');
2290 wakaba 1.22 }
2291 wakaba 1.32 }; # $check_charset
2292    
2293     ## TODO: metadata conformance
2294    
2295     ## TODO: pragma conformance
2296     if (defined $http_equiv_attr) { ## An enumerated attribute
2297     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2298 wakaba 1.33
2299 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2300     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2301     node => $http_equiv_attr,
2302 wakaba 1.104 level => $self->{level}->{must});
2303 wakaba 1.85 } else {
2304     $self->{has_http_equiv}->{$keyword} = 1;
2305     }
2306    
2307     if ($keyword eq 'content-type') {
2308 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2309 wakaba 1.33
2310 wakaba 1.32 $check_charset_decl->();
2311     if ($content_attr) {
2312     my $content = $content_attr->value;
2313 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2314 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2315 wakaba 1.58 =(.+)\z!sx) {
2316 wakaba 1.32 $check_charset->($content_attr, $1);
2317     } else {
2318     $self->{onerror}->(node => $content_attr,
2319     type => 'meta content-type syntax error',
2320 wakaba 1.104 level => $self->{level}->{must});
2321 wakaba 1.85 }
2322     }
2323     } elsif ($keyword eq 'default-style') {
2324     ## ISSUE: Not defined yet in the spec.
2325     } elsif ($keyword eq 'refresh') {
2326     if ($content_attr) {
2327     my $content = $content_attr->value;
2328     if ($content =~ /\A[0-9]+\z/) {
2329     ## NOTE: Valid non-negative integer.
2330     #
2331 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2332 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2333     Whatpm::URIChecker->check_iri_reference ($content, sub {
2334 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2335 wakaba 1.106 }, $self->{level});
2336 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2337    
2338     $element_state->{uri_info}->{content}->{node} = $content_attr;
2339     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2340     ## TODO: absolute
2341     push @{$self->{return}->{uri}->{$content} ||= []},
2342     $element_state->{uri_info}->{content};
2343     } else {
2344     $self->{onerror}->(node => $content_attr,
2345     type => 'refresh:syntax error',
2346 wakaba 1.104 level => $self->{level}->{must});
2347 wakaba 1.32 }
2348     }
2349     } else {
2350     $self->{onerror}->(node => $http_equiv_attr,
2351 wakaba 1.104 type => 'enumerated:invalid',
2352     level => $self->{level}->{must});
2353 wakaba 1.32 }
2354     }
2355    
2356     if (defined $charset_attr) {
2357     $check_charset_decl->();
2358     $check_charset->($charset_attr, $charset_attr->value);
2359 wakaba 1.1 }
2360     },
2361     };
2362    
2363     $Element->{$HTML_NS}->{style} = {
2364 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2365 wakaba 1.40 %HTMLChecker,
2366     check_attrs => $GetHTMLAttrsChecker->({
2367 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2368     media => $HTMLMQAttrChecker,
2369     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2370     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2371     ## not different
2372 wakaba 1.49 }, {
2373     %HTMLAttrStatus,
2374 wakaba 1.82 %XHTML2CommonAttrStatus,
2375 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2376 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2377 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2378 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2379     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2380     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2381     scoped => FEATURE_HTML5_FD,
2382     title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2383     type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2384 wakaba 1.1 }),
2385 wakaba 1.40 check_start => sub {
2386     my ($self, $item, $element_state) = @_;
2387    
2388 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2389 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2390 wakaba 1.93 $type = 'text/css' unless defined $type;
2391     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2392     $type = "$1/$2";
2393     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2394     } else {
2395     ## NOTE: We don't know how parameters are handled by UAs. According to
2396     ## HTML5 specification, <style> with unknown parameters in |type=""|
2397     ## must be ignored.
2398     undef $type;
2399     }
2400     if (not defined $type) {
2401     $element_state->{allow_element} = 1; # invalid type=""
2402     } elsif ($type eq 'text/css') {
2403 wakaba 1.40 $element_state->{allow_element} = 0;
2404 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2405     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2406     # $element_state->{allow_element} = 1;
2407 wakaba 1.40 } else {
2408     $element_state->{allow_element} = 1; # unknown
2409     }
2410 wakaba 1.93 $element_state->{style_type} = $type;
2411 wakaba 1.79
2412     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2413     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2414 wakaba 1.107
2415     $element_state->{text} = '';
2416 wakaba 1.40 },
2417     check_child_element => sub {
2418     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2419     $child_is_transparent, $element_state) = @_;
2420 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2421     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2422 wakaba 1.40 $self->{onerror}->(node => $child_el,
2423     type => 'element not allowed:minus',
2424 wakaba 1.104 level => $self->{level}->{must});
2425 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2426     #
2427     } elsif ($element_state->{allow_element}) {
2428     #
2429     } else {
2430 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2431     level => $self->{level}->{must});
2432 wakaba 1.40 }
2433     },
2434     check_child_text => sub {
2435     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2436 wakaba 1.115 $element_state->{text} .= $child_node->data;
2437 wakaba 1.40 },
2438     check_end => sub {
2439     my ($self, $item, $element_state) = @_;
2440 wakaba 1.93 if (not defined $element_state->{style_type}) {
2441     ## NOTE: Invalid type=""
2442     #
2443     } elsif ($element_state->{style_type} eq 'text/css') {
2444 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2445     container_node => $item->{node},
2446 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2447 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2448     ## NOTE: XML content should be checked by THIS instance of checker
2449     ## as part of normal tree validation. However, we don't know of any
2450     ## XML-based styling language that can be used in HTML <style> element,
2451     ## such that we throw a "style language not supported" error.
2452 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2453     type => 'XML style lang',
2454     text => $element_state->{style_type},
2455     level => $self->{level}->{uncertain});
2456 wakaba 1.93 } else {
2457     ## NOTE: Should we raise some kind of error for,
2458     ## say, <style type="text/plaion">?
2459     $self->{onsubdoc}->({s => $element_state->{text},
2460     container_node => $item->{node},
2461     media_type => $element_state->{style_type},
2462     is_char_string => 1});
2463 wakaba 1.27 }
2464 wakaba 1.40
2465     $HTMLChecker{check_end}->(@_);
2466 wakaba 1.1 },
2467     };
2468 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2469 wakaba 1.1
2470     $Element->{$HTML_NS}->{body} = {
2471 wakaba 1.72 %HTMLFlowContentChecker,
2472 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2473 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2474     alink => $HTMLColorAttrChecker,
2475     background => $HTMLURIAttrChecker,
2476     bgcolor => $HTMLColorAttrChecker,
2477     link => $HTMLColorAttrChecker,
2478     text => $HTMLColorAttrChecker,
2479     vlink => $HTMLColorAttrChecker,
2480     }, {
2481 wakaba 1.49 %HTMLAttrStatus,
2482 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2483 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2484     background => FEATURE_M12N10_REC_DEPRECATED,
2485     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2486 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2487 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2488 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2489     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2490 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2491     vlink => FEATURE_M12N10_REC_DEPRECATED,
2492     }),
2493 wakaba 1.68 check_start => sub {
2494     my ($self, $item, $element_state) = @_;
2495    
2496     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2497 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2498     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2499 wakaba 1.68 },
2500 wakaba 1.1 };
2501    
2502     $Element->{$HTML_NS}->{section} = {
2503 wakaba 1.72 %HTMLFlowContentChecker,
2504 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2505 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2506     }, {
2507     %HTMLAttrStatus,
2508     %XHTML2CommonAttrStatus,
2509     }),
2510 wakaba 1.1 };
2511    
2512     $Element->{$HTML_NS}->{nav} = {
2513 wakaba 1.153 status => FEATURE_HTML5_LC,
2514 wakaba 1.72 %HTMLFlowContentChecker,
2515 wakaba 1.1 };
2516    
2517     $Element->{$HTML_NS}->{article} = {
2518 wakaba 1.174 %HTMLFlowContentChecker,
2519 wakaba 1.153 status => FEATURE_HTML5_LC,
2520 wakaba 1.174 check_attrs => $GetHTMLAttrsChecker->({
2521     pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2522     }, {
2523     %HTMLAttrStatus,
2524     # XXX cite
2525     pubdate => FEATURE_HTML5_LC,
2526     }),
2527     }; # article
2528 wakaba 1.1
2529     $Element->{$HTML_NS}->{blockquote} = {
2530 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2531 wakaba 1.72 %HTMLFlowContentChecker,
2532 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2533 wakaba 1.1 cite => $HTMLURIAttrChecker,
2534 wakaba 1.49 }, {
2535     %HTMLAttrStatus,
2536 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2537 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2538 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2539 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2540 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2541 wakaba 1.1 }),
2542 wakaba 1.66 check_start => sub {
2543     my ($self, $item, $element_state) = @_;
2544    
2545     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2546 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2547     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2548 wakaba 1.66 },
2549 wakaba 1.1 };
2550    
2551     $Element->{$HTML_NS}->{aside} = {
2552 wakaba 1.153 status => FEATURE_HTML5_LC,
2553 wakaba 1.72 %HTMLFlowContentChecker,
2554 wakaba 1.1 };
2555    
2556     $Element->{$HTML_NS}->{h1} = {
2557 wakaba 1.40 %HTMLPhrasingContentChecker,
2558 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2559 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2560     align => $GetHTMLEnumeratedAttrChecker->({
2561     left => 1, center => 1, right => 1, justify => 1,
2562     }),
2563     }, {
2564 wakaba 1.49 %HTMLAttrStatus,
2565 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2566 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2567 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2568 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2569 wakaba 1.49 }),
2570 wakaba 1.40 check_start => sub {
2571     my ($self, $item, $element_state) = @_;
2572     $self->{flag}->{has_hn} = 1;
2573 wakaba 1.79
2574     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2575     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2576 wakaba 1.1 },
2577     };
2578    
2579 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2580 wakaba 1.1
2581 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2582 wakaba 1.1
2583 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2584 wakaba 1.1
2585 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2586 wakaba 1.1
2587 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2588 wakaba 1.1
2589 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2590 wakaba 1.174
2591     # XXX footer in header is disallowed (HTML5 revision 3050)
2592 wakaba 1.29
2593 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2594 wakaba 1.153 status => FEATURE_HTML5_LC,
2595 wakaba 1.72 %HTMLFlowContentChecker,
2596 wakaba 1.40 check_start => sub {
2597     my ($self, $item, $element_state) = @_;
2598     $self->_add_minus_elements ($element_state,
2599     {$HTML_NS => {qw/header 1 footer 1/}},
2600 wakaba 1.58 $HTMLSectioningContent);
2601 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2602     $self->{flag}->{has_hn} = 0;
2603 wakaba 1.79
2604     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2605     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2606 wakaba 1.40 },
2607     check_end => sub {
2608     my ($self, $item, $element_state) = @_;
2609     $self->_remove_minus_elements ($element_state);
2610     unless ($self->{flag}->{has_hn}) {
2611     $self->{onerror}->(node => $item->{node},
2612 wakaba 1.104 type => 'element missing:hn',
2613     level => $self->{level}->{must});
2614 wakaba 1.40 }
2615     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2616 wakaba 1.1
2617 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2618 wakaba 1.1 },
2619 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2620 wakaba 1.1 };
2621    
2622     $Element->{$HTML_NS}->{footer} = {
2623 wakaba 1.153 status => FEATURE_HTML5_LC,
2624 wakaba 1.72 %HTMLFlowContentChecker,
2625 wakaba 1.40 check_start => sub {
2626     my ($self, $item, $element_state) = @_;
2627     $self->_add_minus_elements ($element_state,
2628 wakaba 1.177 {$HTML_NS => {header => 1, footer => 1}},
2629 wakaba 1.58 $HTMLSectioningContent,
2630 wakaba 1.57 $HTMLHeadingContent);
2631 wakaba 1.79
2632     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2633     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2634 wakaba 1.40 },
2635     check_end => sub {
2636     my ($self, $item, $element_state) = @_;
2637     $self->_remove_minus_elements ($element_state);
2638 wakaba 1.1
2639 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2640 wakaba 1.1 },
2641     };
2642    
2643     $Element->{$HTML_NS}->{address} = {
2644 wakaba 1.72 %HTMLFlowContentChecker,
2645 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2646 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2647     ## TODO: add test
2648     #align => $GetHTMLEnumeratedAttrChecker->({
2649     # left => 1, center => 1, right => 1, justify => 1,
2650     #}),
2651     }, {
2652 wakaba 1.49 %HTMLAttrStatus,
2653 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2654 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2655 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2656 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2657     sdapref => FEATURE_HTML20_RFC,
2658 wakaba 1.49 }),
2659 wakaba 1.40 check_start => sub {
2660     my ($self, $item, $element_state) = @_;
2661 wakaba 1.177 $self->_add_minus_elements
2662     ($element_state,
2663     {$HTML_NS => {header => 1, footer => 1, address => 1}},
2664     $HTMLSectioningContent, $HTMLHeadingContent);
2665 wakaba 1.79
2666     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2667     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2668 wakaba 1.40 },
2669     check_end => sub {
2670     my ($self, $item, $element_state) = @_;
2671     $self->_remove_minus_elements ($element_state);
2672 wakaba 1.29
2673 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2674 wakaba 1.29 },
2675 wakaba 1.1 };
2676    
2677     $Element->{$HTML_NS}->{p} = {
2678 wakaba 1.40 %HTMLPhrasingContentChecker,
2679 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2680 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2681     align => $GetHTMLEnumeratedAttrChecker->({
2682     left => 1, center => 1, right => 1, justify => 1,
2683     }),
2684     }, {
2685 wakaba 1.49 %HTMLAttrStatus,
2686 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2687 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2688 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2689 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2690 wakaba 1.49 }),
2691 wakaba 1.1 };
2692    
2693     $Element->{$HTML_NS}->{hr} = {
2694 wakaba 1.40 %HTMLEmptyChecker,
2695 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2696 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2697     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2698     }, {
2699 wakaba 1.49 %HTMLAttrStatus,
2700     %HTMLM12NCommonAttrStatus,
2701     align => FEATURE_M12N10_REC_DEPRECATED,
2702 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2703 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2704 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2705 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2706     width => FEATURE_M12N10_REC_DEPRECATED,
2707     }),
2708 wakaba 1.1 };
2709    
2710     $Element->{$HTML_NS}->{br} = {
2711 wakaba 1.40 %HTMLEmptyChecker,
2712 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2713 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2714     clear => $GetHTMLEnumeratedAttrChecker->({
2715     left => 1, all => 1, right => 1, none => 1,
2716     }),
2717     }, {
2718 wakaba 1.49 %HTMLAttrStatus,
2719 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2720 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2721 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2722 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2723 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2724     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2725 wakaba 1.49 }),
2726 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2727     ## (This requirement is semantic so that we cannot check.)
2728 wakaba 1.1 };
2729    
2730     $Element->{$HTML_NS}->{dialog} = {
2731 wakaba 1.153 status => FEATURE_HTML5_WD,
2732 wakaba 1.40 %HTMLChecker,
2733     check_start => sub {
2734     my ($self, $item, $element_state) = @_;
2735     $element_state->{phase} = 'before dt';
2736 wakaba 1.79
2737     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2738     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2739 wakaba 1.40 },
2740     check_child_element => sub {
2741     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2742     $child_is_transparent, $element_state) = @_;
2743 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2744     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2745 wakaba 1.40 $self->{onerror}->(node => $child_el,
2746     type => 'element not allowed:minus',
2747 wakaba 1.104 level => $self->{level}->{must});
2748 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2749     #
2750     } elsif ($element_state->{phase} eq 'before dt') {
2751     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2752     $element_state->{phase} = 'before dd';
2753     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2754     $self->{onerror}
2755 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2756     text => 'dt',
2757     level => $self->{level}->{must});
2758 wakaba 1.40 $element_state->{phase} = 'before dt';
2759     } else {
2760 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2761     level => $self->{level}->{must});
2762 wakaba 1.40 }
2763     } elsif ($element_state->{phase} eq 'before dd') {
2764     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2765     $element_state->{phase} = 'before dt';
2766     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2767     $self->{onerror}
2768 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2769     text => 'dd',
2770     level => $self->{level}->{must});
2771 wakaba 1.40 $element_state->{phase} = 'before dd';
2772     } else {
2773 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2774     level => $self->{level}->{must});
2775 wakaba 1.1 }
2776 wakaba 1.40 } else {
2777     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2778     }
2779     },
2780     check_child_text => sub {
2781     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2782     if ($has_significant) {
2783 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2784     level => $self->{level}->{must});
2785 wakaba 1.1 }
2786 wakaba 1.40 },
2787     check_end => sub {
2788     my ($self, $item, $element_state) = @_;
2789     if ($element_state->{phase} eq 'before dd') {
2790     $self->{onerror}->(node => $item->{node},
2791 wakaba 1.104 type => 'child element missing',
2792     text => 'dd',
2793     level => $self->{level}->{must});
2794 wakaba 1.1 }
2795 wakaba 1.40
2796     $HTMLChecker{check_end}->(@_);
2797 wakaba 1.1 },
2798     };
2799    
2800     $Element->{$HTML_NS}->{pre} = {
2801 wakaba 1.40 %HTMLPhrasingContentChecker,
2802 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2803 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2804     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2805     }, {
2806 wakaba 1.49 %HTMLAttrStatus,
2807 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2808 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2809 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2810 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2811     }),
2812 wakaba 1.101 check_end => sub {
2813     my ($self, $item, $element_state) = @_;
2814    
2815     ## TODO: Flag to enable/disable IDL checking?
2816 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2817 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2818     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2819     ## NOTE: pre.code > code.idl-code: WebIDL spec
2820     ## NOTE: pre.idl-code: DOM1 spec
2821     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2822     ## NOTE: pre.schema: ReSpec-generated specs
2823 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2824     container_node => $item->{node},
2825     media_type => 'text/x-webidl',
2826     is_char_string => 1});
2827     }
2828    
2829 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2830 wakaba 1.101 },
2831 wakaba 1.1 };
2832    
2833     $Element->{$HTML_NS}->{ol} = {
2834 wakaba 1.40 %HTMLChecker,
2835 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2836 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2837 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2838 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2839 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2840 wakaba 1.69 ## TODO: HTML4 |type|
2841 wakaba 1.49 }, {
2842     %HTMLAttrStatus,
2843 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2844 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2845 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2846 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2847     reversed => FEATURE_HTML5_WD,
2848 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2849 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2850     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2851 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2852 wakaba 1.1 }),
2853 wakaba 1.40 check_child_element => sub {
2854     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2855     $child_is_transparent, $element_state) = @_;
2856 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2857     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2858 wakaba 1.40 $self->{onerror}->(node => $child_el,
2859     type => 'element not allowed:minus',
2860 wakaba 1.104 level => $self->{level}->{must});
2861 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2862     #
2863     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2864     #
2865     } else {
2866 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2867     level => $self->{level}->{must});
2868 wakaba 1.1 }
2869 wakaba 1.40 },
2870     check_child_text => sub {
2871     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2872     if ($has_significant) {
2873 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2874     level => $self->{level}->{must});
2875 wakaba 1.1 }
2876     },
2877     };
2878    
2879     $Element->{$HTML_NS}->{ul} = {
2880 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2881 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2882 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2883     compact => $GetHTMLBooleanAttrChecker->('compact'),
2884 wakaba 1.69 ## TODO: HTML4 |type|
2885     ## TODO: sdaform, align
2886 wakaba 1.68 }, {
2887 wakaba 1.49 %HTMLAttrStatus,
2888 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2889 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2890 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2891 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2892 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2893 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2894     }),
2895 wakaba 1.1 };
2896    
2897 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2898     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2899     %{$Element->{$HTML_NS}->{ul}},
2900     status => FEATURE_M12N10_REC_DEPRECATED,
2901 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2902     compact => $GetHTMLBooleanAttrChecker->('compact'),
2903     }, {
2904 wakaba 1.64 %HTMLAttrStatus,
2905     %HTMLM12NCommonAttrStatus,
2906     align => FEATURE_HTML2X_RFC,
2907     compact => FEATURE_M12N10_REC_DEPRECATED,
2908 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2909 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2910     sdapref => FEATURE_HTML20_RFC,
2911     }),
2912     };
2913    
2914 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2915 wakaba 1.72 %HTMLFlowContentChecker,
2916 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2917 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2918 wakaba 1.69 ## TODO: HTML4 |type|
2919 wakaba 1.49 value => sub {
2920 wakaba 1.1 my ($self, $attr) = @_;
2921 wakaba 1.152
2922     my $parent_is_ol;
2923 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2924     if (defined $parent) {
2925     my $parent_ns = $parent->namespace_uri;
2926     $parent_ns = '' unless defined $parent_ns;
2927     my $parent_ln = $parent->manakai_local_name;
2928 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2929     }
2930    
2931     unless ($parent_is_ol) {
2932     ## ISSUE: No "MUST" in the spec.
2933     $self->{onerror}->(node => $attr,
2934     type => 'non-ol li value',
2935     level => $self->{level}->{html5_fact});
2936 wakaba 1.1 }
2937 wakaba 1.152
2938 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2939 wakaba 1.131 },
2940 wakaba 1.49 }, {
2941     %HTMLAttrStatus,
2942 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2943 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2944 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2945 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2946 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2947 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2948 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2949 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2950 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2951 wakaba 1.1 }),
2952 wakaba 1.40 check_child_element => sub {
2953     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2954     $child_is_transparent, $element_state) = @_;
2955     if ($self->{flag}->{in_menu}) {
2956 wakaba 1.152 ## TODO: In <dir> element, then ...
2957 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2958     } else {
2959 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2960 wakaba 1.40 }
2961     },
2962     check_child_text => sub {
2963     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2964     if ($self->{flag}->{in_menu}) {
2965 wakaba 1.152 ## TODO: In <dir> element, then ...
2966 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2967 wakaba 1.1 } else {
2968 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
2969 wakaba 1.1 }
2970     },
2971     };
2972    
2973     $Element->{$HTML_NS}->{dl} = {
2974 wakaba 1.40 %HTMLChecker,
2975 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2976 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2977     compact => $GetHTMLBooleanAttrChecker->('compact'),
2978     }, {
2979 wakaba 1.49 %HTMLAttrStatus,
2980 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2981 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2982 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2983 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2984     sdapref => FEATURE_HTML20_RFC,
2985 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2986     }),
2987 wakaba 1.40 check_start => sub {
2988     my ($self, $item, $element_state) = @_;
2989     $element_state->{phase} = 'before dt';
2990 wakaba 1.79
2991     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2992     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2993 wakaba 1.40 },
2994     check_child_element => sub {
2995     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2996     $child_is_transparent, $element_state) = @_;
2997 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2998     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2999 wakaba 1.40 $self->{onerror}->(node => $child_el,
3000     type => 'element not allowed:minus',
3001 wakaba 1.104 level => $self->{level}->{must});
3002 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3003     #
3004     } elsif ($element_state->{phase} eq 'in dds') {
3005     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3006     #$element_state->{phase} = 'in dds';
3007     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3008     $element_state->{phase} = 'in dts';
3009     } else {
3010 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3011     level => $self->{level}->{must});
3012 wakaba 1.40 }
3013     } elsif ($element_state->{phase} eq 'in dts') {
3014     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3015     #$element_state->{phase} = 'in dts';
3016     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3017     $element_state->{phase} = 'in dds';
3018     } else {
3019 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3020     level => $self->{level}->{must});
3021 wakaba 1.40 }
3022     } elsif ($element_state->{phase} eq 'before dt') {
3023     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3024     $element_state->{phase} = 'in dts';
3025     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3026     $self->{onerror}
3027 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3028     text => 'dt',
3029     level => $self->{level}->{must});
3030 wakaba 1.40 $element_state->{phase} = 'in dds';
3031     } else {
3032 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3033     level => $self->{level}->{must});
3034 wakaba 1.1 }
3035 wakaba 1.40 } else {
3036     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3037 wakaba 1.1 }
3038 wakaba 1.40 },
3039     check_child_text => sub {
3040     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3041     if ($has_significant) {
3042 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3043     level => $self->{level}->{must});
3044 wakaba 1.40 }
3045     },
3046     check_end => sub {
3047     my ($self, $item, $element_state) = @_;
3048     if ($element_state->{phase} eq 'in dts') {
3049     $self->{onerror}->(node => $item->{node},
3050 wakaba 1.104 type => 'child element missing',
3051     text => 'dd',
3052     level => $self->{level}->{must});
3053 wakaba 1.1 }
3054    
3055 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3056 wakaba 1.1 },
3057     };
3058    
3059     $Element->{$HTML_NS}->{dt} = {
3060 wakaba 1.40 %HTMLPhrasingContentChecker,
3061 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3062 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3063     %HTMLAttrStatus,
3064 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3065 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3066 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3067 wakaba 1.49 }),
3068 wakaba 1.1 };
3069    
3070     $Element->{$HTML_NS}->{dd} = {
3071 wakaba 1.72 %HTMLFlowContentChecker,
3072 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3073 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3074     %HTMLAttrStatus,
3075 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3076 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3077 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3078 wakaba 1.49 }),
3079 wakaba 1.1 };
3080    
3081     $Element->{$HTML_NS}->{a} = {
3082 wakaba 1.123 %HTMLTransparentChecker,
3083 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3084 wakaba 1.40 check_attrs => sub {
3085     my ($self, $item, $element_state) = @_;
3086 wakaba 1.1 my %attr;
3087 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3088 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3089     $attr_ns = '' unless defined $attr_ns;
3090     my $attr_ln = $attr->manakai_local_name;
3091     my $checker;
3092 wakaba 1.73 my $status;
3093 wakaba 1.1 if ($attr_ns eq '') {
3094 wakaba 1.73 $status = {
3095     %HTMLAttrStatus,
3096 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3097 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
3098 wakaba 1.73 charset => FEATURE_M12N10_REC,
3099 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3100 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3101     dn => FEATURE_RFC2659,
3102 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3103 wakaba 1.153 FEATURE_M12N10_REC,
3104     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3105     FEATURE_M12N10_REC,
3106     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3107     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3108 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3109     name => FEATURE_M12N10_REC_DEPRECATED,
3110     nonce => FEATURE_RFC2659,
3111     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3112     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3113 wakaba 1.153 ping => FEATURE_HTML5_WD,
3114 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3115     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3116 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3117 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3118 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3119 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3120     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3121 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3122     }->{$attr_ln};
3123    
3124 wakaba 1.1 $checker = {
3125 wakaba 1.91 charset => sub {
3126     my ($self, $attr) = @_;
3127     $HTMLCharsetChecker->($attr->value, @_);
3128     },
3129 wakaba 1.70 ## TODO: HTML4 |coords|
3130 wakaba 1.1 target => $HTMLTargetAttrChecker,
3131     href => $HTMLURIAttrChecker,
3132     ping => $HTMLSpaceURIsAttrChecker,
3133 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3134 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3135 wakaba 1.70 ## TODO: HTML4 |shape|
3136 wakaba 1.1 media => $HTMLMQAttrChecker,
3137 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3138 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3139     type => $HTMLIMTAttrChecker,
3140     }->{$attr_ln};
3141     if ($checker) {
3142     $attr{$attr_ln} = $attr;
3143 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3144     $attr_ln !~ /[A-Z]/) {
3145 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3146     $status = $HTMLDatasetAttrStatus;
3147 wakaba 1.1 } else {
3148     $checker = $HTMLAttrChecker->{$attr_ln};
3149     }
3150     }
3151     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3152     || $AttrChecker->{$attr_ns}->{''};
3153 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3154     || $AttrStatus->{$attr_ns}->{''};
3155     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3156 wakaba 1.62
3157 wakaba 1.1 if ($checker) {
3158 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3159 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3160 wakaba 1.54 #
3161 wakaba 1.1 } else {
3162 wakaba 1.104 $self->{onerror}->(node => $attr,
3163     type => 'unknown attribute',
3164     level => $self->{level}->{uncertain});
3165 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3166 wakaba 1.1 }
3167 wakaba 1.49
3168 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3169 wakaba 1.1 }
3170    
3171 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3172 wakaba 1.4 if (defined $attr{href}) {
3173     $self->{has_hyperlink_element} = 1;
3174 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3175 wakaba 1.4 } else {
3176 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3177     if (defined $attr{$_}) {
3178     $self->{onerror}->(node => $attr{$_},
3179 wakaba 1.104 type => 'attribute not allowed',
3180     level => $self->{level}->{must});
3181 wakaba 1.1 }
3182     }
3183     }
3184 wakaba 1.66
3185     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3186 wakaba 1.1 },
3187 wakaba 1.40 check_start => sub {
3188     my ($self, $item, $element_state) = @_;
3189     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3190 wakaba 1.79
3191     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3192     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3193 wakaba 1.40 },
3194     check_end => sub {
3195     my ($self, $item, $element_state) = @_;
3196     $self->_remove_minus_elements ($element_state);
3197 wakaba 1.59 delete $self->{flag}->{in_a_href}
3198     unless $element_state->{in_a_href_original};
3199 wakaba 1.1
3200 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3201 wakaba 1.1 },
3202     };
3203    
3204     $Element->{$HTML_NS}->{q} = {
3205 wakaba 1.153 status => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3206 wakaba 1.40 %HTMLPhrasingContentChecker,
3207     check_attrs => $GetHTMLAttrsChecker->({
3208 wakaba 1.50 cite => $HTMLURIAttrChecker,
3209     }, {
3210 wakaba 1.49 %HTMLAttrStatus,
3211 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3212 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3213     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3214 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3215     sdasuff => FEATURE_HTML2X_RFC,
3216 wakaba 1.1 }),
3217 wakaba 1.66 check_start => sub {
3218     my ($self, $item, $element_state) = @_;
3219    
3220     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3221 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3222     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3223 wakaba 1.66 },
3224 wakaba 1.1 };
3225 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3226     ## placed inside the <code>q</code> element." Though we cannot test the
3227     ## element against this requirement since it incluides a semantic bit,
3228     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3229     ## the |q| element.
3230 wakaba 1.1
3231     $Element->{$HTML_NS}->{cite} = {
3232 wakaba 1.40 %HTMLPhrasingContentChecker,
3233 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3234 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3235     %HTMLAttrStatus,
3236 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3237 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3238 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3239 wakaba 1.49 }),
3240 wakaba 1.1 };
3241    
3242     $Element->{$HTML_NS}->{em} = {
3243 wakaba 1.40 %HTMLPhrasingContentChecker,
3244 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3245 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3246     %HTMLAttrStatus,
3247 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3248 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3249 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3250 wakaba 1.49 }),
3251 wakaba 1.1 };
3252    
3253     $Element->{$HTML_NS}->{strong} = {
3254 wakaba 1.40 %HTMLPhrasingContentChecker,
3255 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3256 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3257     %HTMLAttrStatus,
3258 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3259 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3260 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3261 wakaba 1.49 }),
3262 wakaba 1.1 };
3263    
3264     $Element->{$HTML_NS}->{small} = {
3265 wakaba 1.40 %HTMLPhrasingContentChecker,
3266 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3267 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3268     %HTMLAttrStatus,
3269     %HTMLM12NCommonAttrStatus,
3270 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3271 wakaba 1.49 }),
3272 wakaba 1.1 };
3273    
3274 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3275     %HTMLPhrasingContentChecker,
3276     status => FEATURE_M12N10_REC,
3277     check_attrs => $GetHTMLAttrsChecker->({}, {
3278     %HTMLAttrStatus,
3279     %HTMLM12NCommonAttrStatus,
3280 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3281 wakaba 1.51 }),
3282     };
3283    
3284 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3285 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3286 wakaba 1.40 %HTMLPhrasingContentChecker,
3287 wakaba 1.1 };
3288    
3289     $Element->{$HTML_NS}->{dfn} = {
3290 wakaba 1.40 %HTMLPhrasingContentChecker,
3291 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3292 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3293     %HTMLAttrStatus,
3294 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3295 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3296 wakaba 1.49 }),
3297 wakaba 1.40 check_start => sub {
3298     my ($self, $item, $element_state) = @_;
3299     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3300 wakaba 1.1
3301 wakaba 1.40 my $node = $item->{node};
3302 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3303     unless (defined $term) {
3304     for my $child (@{$node->child_nodes}) {
3305     if ($child->node_type == 1) { # ELEMENT_NODE
3306     if (defined $term) {
3307     undef $term;
3308     last;
3309     } elsif ($child->manakai_local_name eq 'abbr') {
3310     my $nsuri = $child->namespace_uri;
3311     if (defined $nsuri and $nsuri eq $HTML_NS) {
3312     my $attr = $child->get_attribute_node_ns (undef, 'title');
3313     if ($attr) {
3314     $term = $attr->value;
3315     }
3316     }
3317     }
3318     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3319     ## TEXT_NODE or CDATA_SECTION_NODE
3320 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3321 wakaba 1.1 next;
3322     }
3323     undef $term;
3324     last;
3325     }
3326     }
3327     unless (defined $term) {
3328     $term = $node->text_content;
3329     }
3330     }
3331     if ($self->{term}->{$term}) {
3332     push @{$self->{term}->{$term}}, $node;
3333     } else {
3334     $self->{term}->{$term} = [$node];
3335     }
3336 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3337     ## |ruby| unless |dfn| has |title|.
3338 wakaba 1.79
3339     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3340     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3341 wakaba 1.40 },
3342     check_end => sub {
3343     my ($self, $item, $element_state) = @_;
3344     $self->_remove_minus_elements ($element_state);
3345 wakaba 1.1
3346 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3347 wakaba 1.1 },
3348     };
3349    
3350     $Element->{$HTML_NS}->{abbr} = {
3351 wakaba 1.40 %HTMLPhrasingContentChecker,
3352 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3353 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3354     %HTMLAttrStatus,
3355 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3356     full => FEATURE_XHTML2_ED,
3357 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3358 wakaba 1.49 }),
3359 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3360     ## number (plural vs singular) must match the grammatical number of the
3361     ## contents of the element." Though this can be checked by machine,
3362     ## it requires language-specific knowledge and dictionary, such that
3363     ## we don't support the check of the requirement.
3364     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3365 wakaba 1.49 };
3366    
3367     $Element->{$HTML_NS}->{acronym} = {
3368     %HTMLPhrasingContentChecker,
3369     status => FEATURE_M12N10_REC,
3370     check_attrs => $GetHTMLAttrsChecker->({}, {
3371     %HTMLAttrStatus,
3372     %HTMLM12NCommonAttrStatus,
3373 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3374 wakaba 1.49 }),
3375 wakaba 1.1 };
3376    
3377     $Element->{$HTML_NS}->{time} = {
3378 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3379 wakaba 1.40 %HTMLPhrasingContentChecker,
3380     check_attrs => $GetHTMLAttrsChecker->({
3381 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3382 wakaba 1.49 }, {
3383     %HTMLAttrStatus,
3384     %HTMLM12NCommonAttrStatus,
3385 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3386 wakaba 1.1 }),
3387 wakaba 1.168 ## TODO: Update definition
3388 wakaba 1.1 ## TODO: Write tests
3389 wakaba 1.40 check_end => sub {
3390     my ($self, $item, $element_state) = @_;
3391 wakaba 1.1
3392 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3393 wakaba 1.1 my $input;
3394     my $reg_sp;
3395     my $input_node;
3396     if ($attr) {
3397     $input = $attr->value;
3398 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3399 wakaba 1.1 $input_node = $attr;
3400     } else {
3401 wakaba 1.40 $input = $item->{node}->text_content;
3402 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3403 wakaba 1.40 $input_node = $item->{node};
3404 wakaba 1.1
3405     ## ISSUE: What is the definition for "successfully extracts a date
3406     ## or time"? If the algorithm says the string is invalid but
3407     ## return some date or time, is it "successfully"?
3408     }
3409    
3410     my $hour;
3411     my $minute;
3412     my $second;
3413     if ($input =~ /
3414     \A
3415 wakaba 1.112 $reg_sp
3416 wakaba 1.1 ([0-9]+) # 1
3417     (?>
3418     -([0-9]+) # 2
3419 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3420     $reg_sp
3421 wakaba 1.1 (?>
3422     T
3423 wakaba 1.112 $reg_sp
3424 wakaba 1.1 )?
3425     ([0-9]+) # 4
3426     :([0-9]+) # 5
3427     (?>
3428     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3429     )?
3430 wakaba 1.112 $reg_sp
3431 wakaba 1.1 (?>
3432     Z
3433 wakaba 1.112 $reg_sp
3434 wakaba 1.1 |
3435     [+-]([0-9]+):([0-9]+) # 7, 8
3436 wakaba 1.112 $reg_sp
3437 wakaba 1.1 )?
3438     \z
3439     |
3440     :([0-9]+) # 9
3441     (?>
3442     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3443     )?
3444 wakaba 1.112 $reg_sp
3445     \z
3446 wakaba 1.1 )
3447     /x) {
3448     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3449     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3450     length $4 != 2 or length $5 != 2) {
3451     $self->{onerror}->(node => $input_node,
3452 wakaba 1.104 type => 'dateortime:syntax error',
3453     level => $self->{level}->{must});
3454 wakaba 1.1 }
3455    
3456     if (1 <= $2 and $2 <= 12) {
3457 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3458     level => $self->{level}->{must})
3459 wakaba 1.1 if $3 < 1 or
3460     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3461 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3462     level => $self->{level}->{must})
3463 wakaba 1.1 if $2 == 2 and $3 == 29 and
3464     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3465     } else {
3466     $self->{onerror}->(node => $input_node,
3467 wakaba 1.104 type => 'datetime:bad month',
3468     level => $self->{level}->{must});
3469 wakaba 1.1 }
3470    
3471     ($hour, $minute, $second) = ($4, $5, $6);
3472    
3473     if (defined $7) { ## [+-]hh:mm
3474     if (length $7 != 2 or length $8 != 2) {
3475     $self->{onerror}->(node => $input_node,
3476 wakaba 1.104 type => 'dateortime:syntax error',
3477     level => $self->{level}->{must});
3478 wakaba 1.1 }
3479    
3480     $self->{onerror}->(node => $input_node,
3481 wakaba 1.104 type => 'datetime:bad timezone hour',
3482     level => $self->{level}->{must})
3483 wakaba 1.1 if $7 > 23;
3484     $self->{onerror}->(node => $input_node,
3485 wakaba 1.104 type => 'datetime:bad timezone minute',
3486     level => $self->{level}->{must})
3487 wakaba 1.1 if $8 > 59;
3488     }
3489     } else { ## hh:mm
3490     if (length $1 != 2 or length $9 != 2) {
3491     $self->{onerror}->(node => $input_node,
3492 wakaba 1.104 type => qq'dateortime:syntax error',
3493     level => $self->{level}->{must});
3494 wakaba 1.1 }
3495    
3496     ($hour, $minute, $second) = ($1, $9, $10);
3497     }
3498    
3499 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3500     level => $self->{level}->{must}) if $hour > 23;
3501     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3502     level => $self->{level}->{must}) if $minute > 59;
3503 wakaba 1.1
3504     if (defined $second) { ## s
3505     ## NOTE: Integer part of second don't have to have length of two.
3506    
3507     if (substr ($second, 0, 1) eq '.') {
3508     $self->{onerror}->(node => $input_node,
3509 wakaba 1.104 type => 'dateortime:syntax error',
3510     level => $self->{level}->{must});
3511 wakaba 1.1 }
3512    
3513 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3514     level => $self->{level}->{must}) if $second >= 60;
3515 wakaba 1.1 }
3516     } else {
3517     $self->{onerror}->(node => $input_node,
3518 wakaba 1.104 type => 'dateortime:syntax error',
3519     level => $self->{level}->{must});
3520 wakaba 1.1 }
3521    
3522 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3523 wakaba 1.1 },
3524     };
3525    
3526     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3527 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3528 wakaba 1.113 ## TODO: content checking
3529     ## TODO: content or value must contain number (rev 2053)
3530 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3531 wakaba 1.40 %HTMLPhrasingContentChecker,
3532     check_attrs => $GetHTMLAttrsChecker->({
3533 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3534     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3535     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3536     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3537     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3538     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3539 wakaba 1.50 }, {
3540     %HTMLAttrStatus,
3541     high => FEATURE_HTML5_DEFAULT,
3542     low => FEATURE_HTML5_DEFAULT,
3543     max => FEATURE_HTML5_DEFAULT,
3544     min => FEATURE_HTML5_DEFAULT,
3545     optimum => FEATURE_HTML5_DEFAULT,
3546     value => FEATURE_HTML5_DEFAULT,
3547 wakaba 1.1 }),
3548     };
3549    
3550     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3551 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3552 wakaba 1.40 %HTMLPhrasingContentChecker,
3553     check_attrs => $GetHTMLAttrsChecker->({
3554 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3555     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3556 wakaba 1.50 }, {
3557     %HTMLAttrStatus,
3558     max => FEATURE_HTML5_DEFAULT,
3559     value => FEATURE_HTML5_DEFAULT,
3560 wakaba 1.1 }),
3561     };
3562    
3563     $Element->{$HTML_NS}->{code} = {
3564 wakaba 1.40 %HTMLPhrasingContentChecker,
3565 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3566 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3567     %HTMLAttrStatus,
3568 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3569 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3570 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3571 wakaba 1.49 }),
3572 wakaba 1.1 };
3573    
3574     $Element->{$HTML_NS}->{var} = {
3575 wakaba 1.40 %HTMLPhrasingContentChecker,
3576 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3577 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3578     %HTMLAttrStatus,
3579 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3580 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3581 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3582 wakaba 1.49 }),
3583 wakaba 1.1 };
3584    
3585     $Element->{$HTML_NS}->{samp} = {
3586 wakaba 1.40 %HTMLPhrasingContentChecker,
3587 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3588 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3589     %HTMLAttrStatus,
3590 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3591 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3592 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3593 wakaba 1.49 }),
3594 wakaba 1.1 };
3595    
3596     $Element->{$HTML_NS}->{kbd} = {
3597 wakaba 1.40 %HTMLPhrasingContentChecker,
3598 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3599 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3600     %HTMLAttrStatus,
3601 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3602 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3603 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3604 wakaba 1.49 }),
3605 wakaba 1.1 };
3606    
3607     $Element->{$HTML_NS}->{sub} = {
3608 wakaba 1.40 %HTMLPhrasingContentChecker,
3609 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3610 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3611     %HTMLAttrStatus,
3612 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3613 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3614 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3615 wakaba 1.49 }),
3616 wakaba 1.1 };
3617    
3618 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3619 wakaba 1.1
3620     $Element->{$HTML_NS}->{span} = {
3621 wakaba 1.40 %HTMLPhrasingContentChecker,
3622 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3623 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3624     %HTMLAttrStatus,
3625 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3626 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3627     dataformatas => FEATURE_HTML4_REC_RESERVED,
3628     datasrc => FEATURE_HTML4_REC_RESERVED,
3629 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3630 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3631 wakaba 1.49 }),
3632 wakaba 1.1 };
3633    
3634     $Element->{$HTML_NS}->{i} = {
3635 wakaba 1.40 %HTMLPhrasingContentChecker,
3636 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3637     check_attrs => $GetHTMLAttrsChecker->({}, {
3638     %HTMLAttrStatus,
3639     %HTMLM12NCommonAttrStatus,
3640 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3641 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3642 wakaba 1.49 }),
3643 wakaba 1.1 };
3644    
3645 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3646    
3647 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3648     %HTMLPhrasingContentChecker,
3649     status => FEATURE_M12N10_REC,
3650     check_attrs => $GetHTMLAttrsChecker->({}, {
3651     %HTMLAttrStatus,
3652     %HTMLM12NCommonAttrStatus,
3653 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3654 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3655     }),
3656     };
3657 wakaba 1.51
3658     $Element->{$HTML_NS}->{s} = {
3659 wakaba 1.40 %HTMLPhrasingContentChecker,
3660 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3661 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3662     %HTMLAttrStatus,
3663     %HTMLM12NCommonAttrStatus,
3664 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3665 wakaba 1.49 }),
3666 wakaba 1.1 };
3667    
3668 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3669    
3670     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3671    
3672 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3673 wakaba 1.40 %HTMLPhrasingContentChecker,
3674 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3675 wakaba 1.40 check_attrs => sub {
3676     my ($self, $item, $element_state) = @_;
3677 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3678     %HTMLAttrStatus,
3679 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3680     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3681     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3682     style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3683     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3684     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3685 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3686     sdasuff => FEATURE_HTML2X_RFC,
3687 wakaba 1.49 })->($self, $item, $element_state);
3688 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3689     $self->{onerror}->(node => $item->{node},
3690 wakaba 1.104 type => 'attribute missing',
3691     text => 'dir',
3692     level => $self->{level}->{must});
3693 wakaba 1.1 }
3694     },
3695     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3696     };
3697    
3698 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3699     %HTMLPhrasingContentChecker,
3700     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3701     check_attrs => $GetHTMLAttrsChecker->({}, {
3702     %HTMLAttrStatus,
3703     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3704 wakaba 1.153 lang => FEATURE_HTML5_WD,
3705 wakaba 1.99 }),
3706     check_start => sub {
3707     my ($self, $item, $element_state) = @_;
3708    
3709     $element_state->{phase} = 'before-rb';
3710     #$element_state->{has_sig}
3711 wakaba 1.100
3712     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3713     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3714 wakaba 1.99 },
3715     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3716     check_child_element => sub {
3717     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3718     $child_is_transparent, $element_state) = @_;
3719 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3720     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3721 wakaba 1.99 $self->{onerror}->(node => $child_el,
3722     type => 'element not allowed:minus',
3723 wakaba 1.104 level => $self->{level}->{must});
3724 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3725     #
3726     } elsif ($element_state->{phase} eq 'before-rb') {
3727     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3728     $element_state->{phase} = 'in-rb';
3729     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3730     $self->{onerror}->(node => $child_el,
3731 wakaba 1.104 level => $self->{level}->{should},
3732     type => 'no significant content before');
3733 wakaba 1.99 $element_state->{phase} = 'after-rt';
3734     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3735     $self->{onerror}->(node => $child_el,
3736 wakaba 1.104 level => $self->{level}->{should},
3737     type => 'no significant content before');
3738 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3739     } else {
3740     $self->{onerror}->(node => $child_el,
3741 wakaba 1.104 type => 'element not allowed:ruby base',
3742     level => $self->{level}->{must});
3743 wakaba 1.99 $element_state->{phase} = 'in-rb';
3744     }
3745     } elsif ($element_state->{phase} eq 'in-rb') {
3746     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3747     #$element_state->{phase} = 'in-rb';
3748     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3749     unless ($element_state->{has_significant}) {
3750     $self->{onerror}->(node => $child_el,
3751 wakaba 1.104 level => $self->{level}->{should},
3752     type => 'no significant content before');
3753 wakaba 1.99 }
3754     $element_state->{phase} = 'after-rt';
3755     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3756     unless ($element_state->{has_significant}) {
3757     $self->{onerror}->(node => $child_el,
3758 wakaba 1.104 level => $self->{level}->{should},
3759     type => 'no significant content before');
3760 wakaba 1.99 }
3761     $element_state->{phase} = 'after-rp1';
3762     } else {
3763     $self->{onerror}->(node => $child_el,
3764 wakaba 1.104 type => 'element not allowed:ruby base',
3765     level => $self->{level}->{must});
3766 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3767     }
3768     } elsif ($element_state->{phase} eq 'after-rt') {
3769     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3770     if ($element_state->{has_significant}) {
3771     $element_state->{has_sig} = 1;
3772     delete $element_state->{has_significant};
3773     }
3774     $element_state->{phase} = 'in-rb';
3775     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3776     $self->{onerror}->(node => $child_el,
3777 wakaba 1.104 level => $self->{level}->{should},
3778     type => 'no significant content before');
3779 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3780     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3781     $self->{onerror}->(node => $child_el,
3782 wakaba 1.104 level => $self->{level}->{should},
3783     type => 'no significant content before');
3784 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3785     } else {
3786     $self->{onerror}->(node => $child_el,
3787 wakaba 1.104 type => 'element not allowed:ruby base',
3788     level => $self->{level}->{must});
3789 wakaba 1.99 if ($element_state->{has_significant}) {
3790     $element_state->{has_sig} = 1;
3791     delete $element_state->{has_significant};
3792     }
3793     $element_state->{phase} = 'in-rb';
3794     }
3795     } elsif ($element_state->{phase} eq 'after-rp1') {
3796     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3797     $element_state->{phase} = 'after-rp-rt';
3798     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3799     $self->{onerror}->(node => $child_el,
3800 wakaba 1.104 type => 'ps element missing',
3801     text => 'rt',
3802     level => $self->{level}->{must});
3803 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3804     } else {
3805     $self->{onerror}->(node => $child_el,
3806 wakaba 1.104 type => 'ps element missing',
3807     text => 'rt',
3808     level => $self->{level}->{must});
3809 wakaba 1.99 $self->{onerror}->(node => $child_el,
3810 wakaba 1.104 type => 'ps element missing',
3811     text => 'rp',
3812     level => $self->{level}->{must});
3813 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3814     $self->{onerror}->(node => $child_el,
3815 wakaba 1.104 type => 'element not allowed:ruby base',
3816     level => $self->{level}->{must});
3817 wakaba 1.99 }
3818     if ($element_state->{has_significant}) {
3819     $element_state->{has_sig} = 1;
3820     delete $element_state->{has_significant};
3821     }
3822     $element_state->{phase} = 'in-rb';
3823     }
3824     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3825     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3826     $element_state->{phase} = 'after-rp2';
3827     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3828     $self->{onerror}->(node => $child_el,
3829 wakaba 1.104 type => 'ps element missing',
3830     text => 'rp',
3831     level => $self->{level}->{must});
3832 wakaba 1.99 $self->{onerror}->(node => $child_el,
3833 wakaba 1.104 level => $self->{level}->{should},
3834     type => 'no significant content before');
3835 wakaba 1.99 $element_state->{phase} = 'after-rt';
3836     } else {
3837     $self->{onerror}->(node => $child_el,
3838 wakaba 1.104 type => 'ps element missing',
3839     text => 'rp',
3840     level => $self->{level}->{must});
3841 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3842     $self->{onerror}->(node => $child_el,
3843 wakaba 1.104 type => 'element not allowed:ruby base',
3844     level => $self->{level}->{must});
3845 wakaba 1.99 }
3846     if ($element_state->{has_significant}) {
3847     $element_state->{has_sig} = 1;
3848     delete $element_state->{has_significant};
3849     }
3850     $element_state->{phase} = 'in-rb';
3851     }
3852     } elsif ($element_state->{phase} eq 'after-rp2') {
3853     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3854     if ($element_state->{has_significant}) {
3855     $element_state->{has_sig} = 1;
3856     delete $element_state->{has_significant};
3857     }
3858     $element_state->{phase} = 'in-rb';
3859     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3860     $self->{onerror}->(node => $child_el,
3861 wakaba 1.104 level => $self->{level}->{should},
3862     type => 'no significant content before');
3863 wakaba 1.99 $element_state->{phase} = 'after-rt';
3864     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3865     $self->{onerror}->(node => $child_el,
3866 wakaba 1.104 level => $self->{level}->{should},
3867     type => 'no significant content before');
3868 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3869     } else {
3870     $self->{onerror}->(node => $child_el,
3871 wakaba 1.104 type => 'element not allowed:ruby base',
3872     level => $self->{level}->{must});
3873 wakaba 1.99 if ($element_state->{has_significant}) {
3874     $element_state->{has_sig} = 1;
3875     delete $element_state->{has_significant};
3876     }
3877     $element_state->{phase} = 'in-rb';
3878     }
3879     } else {
3880     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3881     }
3882     },
3883     check_child_text => sub {
3884     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3885     if ($has_significant) {
3886     if ($element_state->{phase} eq 'before-rb') {
3887     $element_state->{phase} = 'in-rb';
3888     } elsif ($element_state->{phase} eq 'in-rb') {
3889     #
3890     } elsif ($element_state->{phase} eq 'after-rt' or
3891     $element_state->{phase} eq 'after-rp2') {
3892     $element_state->{phase} = 'in-rb';
3893     } elsif ($element_state->{phase} eq 'after-rp1') {
3894     $self->{onerror}->(node => $child_node,
3895 wakaba 1.104 type => 'ps element missing',
3896     text => 'rt',
3897     level => $self->{level}->{must});
3898 wakaba 1.99 $self->{onerror}->(node => $child_node,
3899 wakaba 1.104 type => 'ps element missing',
3900     text => 'rp',
3901     level => $self->{level}->{must});
3902 wakaba 1.99 $element_state->{phase} = 'in-rb';
3903     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3904     $self->{onerror}->(node => $child_node,
3905 wakaba 1.104 type => 'ps element missing',
3906     text => 'rp',
3907     level => $self->{level}->{must});
3908 wakaba 1.99 $element_state->{phase} = 'in-rb';
3909     } else {
3910     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3911     }
3912     }
3913     },
3914     check_end => sub {
3915     my ($self, $item, $element_state) = @_;
3916     $self->_remove_minus_elements ($element_state);
3917    
3918     if ($element_state->{phase} eq 'before-rb') {
3919     $self->{onerror}->(node => $item->{node},
3920 wakaba 1.104 level => $self->{level}->{should},
3921 wakaba 1.99 type => 'no significant content');
3922     $self->{onerror}->(node => $item->{node},
3923 wakaba 1.104 type => 'element missing',
3924     text => 'rt',
3925     level => $self->{level}->{must});
3926 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3927     unless ($element_state->{has_significant}) {
3928     $self->{onerror}->(node => $item->{node},
3929 wakaba 1.104 level => $self->{level}->{should},
3930     type => 'no significant content at the end');
3931 wakaba 1.99 }
3932     $self->{onerror}->(node => $item->{node},
3933 wakaba 1.104 type => 'element missing',
3934     text => 'rt',
3935     level => $self->{level}->{must});
3936 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3937     $element_state->{phase} eq 'after-rp2') {
3938     #
3939     } elsif ($element_state->{phase} eq 'after-rp1') {
3940     $self->{onerror}->(node => $item->{node},
3941 wakaba 1.104 type => 'element missing',
3942     text => 'rt',
3943     level => $self->{level}->{must});
3944 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3945 wakaba 1.104 type => 'element missing',
3946     text => 'rp',
3947     level => $self->{level}->{must});
3948 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3949     $self->{onerror}->(node => $item->{node},
3950 wakaba 1.104 type => 'element missing',
3951     text => 'rp',
3952     level => $self->{level}->{must});
3953 wakaba 1.99 } else {
3954     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3955     }
3956    
3957     ## NOTE: A modified version of |check_end| of %AnyChecker.
3958     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3959     $item->{real_parent_state}->{has_significant} = 1;
3960     }
3961     },
3962     };
3963    
3964     $Element->{$HTML_NS}->{rt} = {
3965     %HTMLPhrasingContentChecker,
3966     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3967     check_attrs => $GetHTMLAttrsChecker->({}, {
3968     %HTMLAttrStatus,
3969     %HTMLM12NXHTML2CommonAttrStatus,
3970 wakaba 1.153 lang => FEATURE_HTML5_WD,
3971 wakaba 1.99 }),
3972     };
3973    
3974     $Element->{$HTML_NS}->{rp} = {
3975 wakaba 1.171 %HTMLPhrasingContentChecker,
3976 wakaba 1.99 status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3977     check_attrs => $GetHTMLAttrsChecker->({}, {
3978     %HTMLAttrStatus,
3979     %HTMLM12NXHTML2CommonAttrStatus,
3980 wakaba 1.153 lang => FEATURE_HTML5_WD,
3981 wakaba 1.99 }),
3982 wakaba 1.171 }; # rp
3983 wakaba 1.99
3984 wakaba 1.29 =pod
3985    
3986     ## TODO:
3987    
3988     +
3989     + <p>Partly because of the confusion described above, authors are
3990     + strongly recommended to always mark up all paragraphs with the
3991     + <code>p</code> element, and to not have any <code>ins</code> or
3992     + <code>del</code> elements that cross across any <span
3993     + title="paragraph">implied paragraphs</span>.</p>
3994     +
3995     (An informative note)
3996    
3997     <p><code>ins</code> elements should not cross <span
3998     + title="paragraph">implied paragraph</span> boundaries.</p>
3999     (normative)
4000    
4001     + <p><code>del</code> elements should not cross <span
4002     + title="paragraph">implied paragraph</span> boundaries.</p>
4003     (normative)
4004    
4005     =cut
4006    
4007 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4008 wakaba 1.40 %HTMLTransparentChecker,
4009 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4010 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4011 wakaba 1.1 cite => $HTMLURIAttrChecker,
4012 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4013 wakaba 1.49 }, {
4014     %HTMLAttrStatus,
4015     %HTMLM12NCommonAttrStatus,
4016 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4017 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4018     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4019 wakaba 1.1 }),
4020 wakaba 1.66 check_start => sub {
4021     my ($self, $item, $element_state) = @_;
4022    
4023     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4024 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4025     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4026 wakaba 1.66 },
4027 wakaba 1.1 };
4028    
4029     $Element->{$HTML_NS}->{del} = {
4030 wakaba 1.40 %HTMLTransparentChecker,
4031 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4032 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4033 wakaba 1.1 cite => $HTMLURIAttrChecker,
4034 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4035 wakaba 1.49 }, {
4036     %HTMLAttrStatus,
4037     %HTMLM12NCommonAttrStatus,
4038 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4039 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4040     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4041 wakaba 1.1 }),
4042 wakaba 1.40 check_end => sub {
4043     my ($self, $item, $element_state) = @_;
4044     if ($element_state->{has_significant}) {
4045     ## NOTE: Significantness flag does not propagate.
4046     } elsif ($item->{transparent}) {
4047     #
4048     } else {
4049     $self->{onerror}->(node => $item->{node},
4050 wakaba 1.104 level => $self->{level}->{should},
4051 wakaba 1.40 type => 'no significant content');
4052     }
4053 wakaba 1.1 },
4054 wakaba 1.66 check_start => sub {
4055     my ($self, $item, $element_state) = @_;
4056    
4057     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4058 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4059     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4060 wakaba 1.66 },
4061 wakaba 1.1 };
4062    
4063 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4064 wakaba 1.72 %HTMLFlowContentChecker,
4065 wakaba 1.153 status => FEATURE_HTML5_WD,
4066 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4067 wakaba 1.41 check_child_element => sub {
4068     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4069     $child_is_transparent, $element_state) = @_;
4070 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4071     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4072 wakaba 1.41 $self->{onerror}->(node => $child_el,
4073     type => 'element not allowed:minus',
4074 wakaba 1.104 level => $self->{level}->{must});
4075 wakaba 1.41 $element_state->{has_non_legend} = 1;
4076     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4077     #
4078     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4079     if ($element_state->{has_legend_at_first}) {
4080     $self->{onerror}->(node => $child_el,
4081     type => 'element not allowed:figure legend',
4082 wakaba 1.104 level => $self->{level}->{must});
4083 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4084     $self->{onerror}->(node => $element_state->{has_legend},
4085     type => 'element not allowed:figure legend',
4086 wakaba 1.104 level => $self->{level}->{must});
4087 wakaba 1.41 $element_state->{has_legend} = $child_el;
4088     } elsif ($element_state->{has_non_legend}) {
4089     $element_state->{has_legend} = $child_el;
4090     } else {
4091     $element_state->{has_legend_at_first} = 1;
4092 wakaba 1.35 }
4093 wakaba 1.41 delete $element_state->{has_non_legend};
4094     } else {
4095 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4096 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4097 wakaba 1.41 }
4098     },
4099     check_child_text => sub {
4100     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4101     if ($has_significant) {
4102     $element_state->{has_non_legend} = 1;
4103 wakaba 1.35 }
4104 wakaba 1.170
4105     $element_state->{in_figure} = 1;
4106 wakaba 1.41 },
4107     check_end => sub {
4108     my ($self, $item, $element_state) = @_;
4109 wakaba 1.35
4110 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4111     #
4112     } elsif ($element_state->{has_legend}) {
4113     if ($element_state->{has_non_legend}) {
4114     $self->{onerror}->(node => $element_state->{has_legend},
4115 wakaba 1.35 type => 'element not allowed:figure legend',
4116 wakaba 1.104 level => $self->{level}->{must});
4117 wakaba 1.35 }
4118     }
4119 wakaba 1.41
4120 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4121 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4122 wakaba 1.35 },
4123     };
4124 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4125 wakaba 1.1
4126 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4127     my ($self, $attr) = @_;
4128 wakaba 1.104 $self->{onerror}->(node => $attr,
4129     type => 'unknown attribute',
4130     level => $self->{level}->{uncertain});
4131 wakaba 1.92 };
4132    
4133 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4134 wakaba 1.40 %HTMLEmptyChecker,
4135 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4136 wakaba 1.40 check_attrs => sub {
4137     my ($self, $item, $element_state) = @_;
4138 wakaba 1.1 $GetHTMLAttrsChecker->({
4139 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4140     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4141     }),
4142 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4143 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4144 wakaba 1.1 src => $HTMLURIAttrChecker,
4145     usemap => $HTMLUsemapAttrChecker,
4146 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4147 wakaba 1.1 ismap => sub {
4148 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4149     if (not $self->{flag}->{in_a_href}) {
4150 wakaba 1.15 $self->{onerror}->(node => $attr,
4151 wakaba 1.59 type => 'attribute not allowed:ismap',
4152 wakaba 1.104 level => $self->{level}->{must});
4153 wakaba 1.1 }
4154 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4155 wakaba 1.1 },
4156 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4157     ## TODO: HTML4 |name|
4158 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4159 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4160 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4161 wakaba 1.49 }, {
4162     %HTMLAttrStatus,
4163 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4164 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4165 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4166 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4167 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4168 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4169 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4170     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4171 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4172     name => FEATURE_M12N10_REC_DEPRECATED,
4173 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4174 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4175     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4176 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4177 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4178 wakaba 1.66 })->($self, $item, $element_state);
4179 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4180     $self->{onerror}->(node => $item->{node},
4181 wakaba 1.104 type => 'attribute missing',
4182     text => 'alt',
4183     level => $self->{level}->{should});
4184 wakaba 1.114 ## TODO: ...
4185 wakaba 1.1 }
4186 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4187     $self->{onerror}->(node => $item->{node},
4188 wakaba 1.104 type => 'attribute missing',
4189     text => 'src',
4190     level => $self->{level}->{must});
4191 wakaba 1.1 }
4192 wakaba 1.66
4193 wakaba 1.114 ## TODO: external resource check
4194    
4195 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4196     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4197     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4198     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4199 wakaba 1.1 },
4200     };
4201    
4202     $Element->{$HTML_NS}->{iframe} = {
4203 wakaba 1.40 %HTMLTextChecker,
4204 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4205 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4206 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4207 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4208 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4209 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4210     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4211     }),
4212     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4213 wakaba 1.1 src => $HTMLURIAttrChecker,
4214 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4215 wakaba 1.49 }, {
4216     %HTMLAttrStatus,
4217     %HTMLM12NCommonAttrStatus,
4218     align => FEATURE_XHTML10_REC,
4219 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4220 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4221 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4222     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4223 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4224     marginheight => FEATURE_M12N10_REC,
4225     marginwidth => FEATURE_M12N10_REC,
4226 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4227     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4228     sandbox => FEATURE_HTML5_WD,
4229 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4230 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4231     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4232 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4233     width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4234 wakaba 1.1 }),
4235 wakaba 1.66 check_start => sub {
4236     my ($self, $item, $element_state) = @_;
4237    
4238     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4239 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4240     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4241 wakaba 1.66 },
4242 wakaba 1.40 };
4243    
4244 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4245 wakaba 1.40 %HTMLEmptyChecker,
4246 wakaba 1.98 status => FEATURE_HTML5_WD,
4247 wakaba 1.40 check_attrs => sub {
4248     my ($self, $item, $element_state) = @_;
4249 wakaba 1.1 my $has_src;
4250 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4251 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4252     $attr_ns = '' unless defined $attr_ns;
4253     my $attr_ln = $attr->manakai_local_name;
4254     my $checker;
4255 wakaba 1.73
4256     my $status = {
4257     %HTMLAttrStatus,
4258 wakaba 1.153 height => FEATURE_HTML5_LC,
4259 wakaba 1.98 src => FEATURE_HTML5_WD,
4260     type => FEATURE_HTML5_WD,
4261 wakaba 1.153 width => FEATURE_HTML5_LC,
4262 wakaba 1.73 }->{$attr_ln};
4263    
4264 wakaba 1.1 if ($attr_ns eq '') {
4265     if ($attr_ln eq 'src') {
4266     $checker = $HTMLURIAttrChecker;
4267     $has_src = 1;
4268     } elsif ($attr_ln eq 'type') {
4269     $checker = $HTMLIMTAttrChecker;
4270 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4271     $checker = $AttrCheckerNotImplemented; ## TODO: because spec does not define them yet.
4272 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4273     $attr_ln !~ /[A-Z]/) {
4274 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4275     $status = $HTMLDatasetAttrStatus;
4276 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4277 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4278 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4279 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4280     || sub { }; ## NOTE: Any local attribute is ok.
4281 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4282 wakaba 1.117 } else {
4283     $checker = $HTMLAttrChecker->{$attr_ln};
4284 wakaba 1.1 }
4285     }
4286     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4287 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4288     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4289     || $AttrStatus->{$attr_ns}->{''};
4290     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4291 wakaba 1.62
4292 wakaba 1.1 if ($checker) {
4293 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4294 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4295 wakaba 1.54 #
4296 wakaba 1.1 } else {
4297 wakaba 1.104 $self->{onerror}->(node => $attr,
4298     type => 'unknown attribute',
4299     level => $self->{level}->{uncertain});
4300 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4301     }
4302    
4303 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4304 wakaba 1.1 }
4305    
4306     unless ($has_src) {
4307 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4308 wakaba 1.104 type => 'attribute missing',
4309     text => 'src',
4310 wakaba 1.114 level => $self->{level}->{info});
4311     ## NOTE: <embed> without src="" is allowed since revision 1929.
4312     ## We issues an informational message since <embed> w/o src=""
4313     ## is likely an authoring error.
4314 wakaba 1.1 }
4315 wakaba 1.114
4316     ## TODO: external resource check
4317 wakaba 1.66
4318     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4319 wakaba 1.1 },
4320     };
4321    
4322 wakaba 1.49 ## TODO:
4323     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4324     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4325    
4326 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4327 wakaba 1.40 %HTMLTransparentChecker,
4328 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4329 wakaba 1.40 check_attrs => sub {
4330     my ($self, $item, $element_state) = @_;
4331 wakaba 1.1 $GetHTMLAttrsChecker->({
4332 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4333     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4334     }),
4335     archive => $HTMLSpaceURIsAttrChecker,
4336     ## TODO: Relative to @codebase
4337     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4338     classid => $HTMLURIAttrChecker,
4339     codebase => $HTMLURIAttrChecker,
4340     codetype => $HTMLIMTAttrChecker,
4341     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4342 wakaba 1.1 data => $HTMLURIAttrChecker,
4343 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4344     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4345     ## [HTML4] but we don't know how to test this.
4346 wakaba 1.167 form => $HTMLFormAttrChecker,
4347 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4348 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4349 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4350     ## the name of the browsing context created by the element,
4351     ## if any, but is also used as the form control name of the
4352     ## form control provided by the plugin, if any.
4353 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4354 wakaba 1.1 type => $HTMLIMTAttrChecker,
4355     usemap => $HTMLUsemapAttrChecker,
4356 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4357 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4358 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4359 wakaba 1.49 }, {
4360     %HTMLAttrStatus,
4361 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4362 wakaba 1.49 align => FEATURE_XHTML10_REC,
4363 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4364 wakaba 1.49 border => FEATURE_XHTML10_REC,
4365     classid => FEATURE_M12N10_REC,
4366     codebase => FEATURE_M12N10_REC,
4367     codetype => FEATURE_M12N10_REC,
4368 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4369 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4370 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4371     dataformatas => FEATURE_HTML4_REC_RESERVED,
4372     datasrc => FEATURE_HTML4_REC_RESERVED,
4373 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4374 wakaba 1.167 form => FEATURE_HTML5_DEFAULT,
4375 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4376 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4377 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4378     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4379 wakaba 1.49 standby => FEATURE_M12N10_REC,
4380 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4381 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4382     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4383 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4384 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4385 wakaba 1.66 })->($self, $item, $element_state);
4386 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4387     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4388     $self->{onerror}->(node => $item->{node},
4389 wakaba 1.104 type => 'attribute missing:data|type',
4390     level => $self->{level}->{must});
4391 wakaba 1.1 }
4392     }
4393 wakaba 1.66
4394     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4395     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4396     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4397     ## TODO: archive
4398     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4399 wakaba 1.1 },
4400 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4401 wakaba 1.41 check_child_element => sub {
4402     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4403     $child_is_transparent, $element_state) = @_;
4404 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4405     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4406 wakaba 1.41 $self->{onerror}->(node => $child_el,
4407     type => 'element not allowed:minus',
4408 wakaba 1.104 level => $self->{level}->{must});
4409 wakaba 1.41 $element_state->{has_non_legend} = 1;
4410     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4411     #
4412     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4413     if ($element_state->{has_non_param}) {
4414 wakaba 1.104 $self->{onerror}->(node => $child_el,
4415 wakaba 1.72 type => 'element not allowed:flow',
4416 wakaba 1.104 level => $self->{level}->{must});
4417 wakaba 1.39 }
4418 wakaba 1.41 } else {
4419 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4420 wakaba 1.41 $element_state->{has_non_param} = 1;
4421 wakaba 1.39 }
4422 wakaba 1.25 },
4423 wakaba 1.41 check_child_text => sub {
4424     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4425     if ($has_significant) {
4426     $element_state->{has_non_param} = 1;
4427     }
4428 wakaba 1.42 },
4429     check_end => sub {
4430     my ($self, $item, $element_state) = @_;
4431     if ($element_state->{has_significant}) {
4432 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4433 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4434     ## NOTE: Transparent.
4435     } else {
4436     $self->{onerror}->(node => $item->{node},
4437 wakaba 1.104 level => $self->{level}->{should},
4438 wakaba 1.42 type => 'no significant content');
4439     }
4440     },
4441 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4442 wakaba 1.1 };
4443 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4444     ## What about |<section><object data><style scoped></style>x</object></section>|?
4445     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4446 wakaba 1.1
4447     $Element->{$HTML_NS}->{param} = {
4448 wakaba 1.40 %HTMLEmptyChecker,
4449 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4450 wakaba 1.40 check_attrs => sub {
4451     my ($self, $item, $element_state) = @_;
4452 wakaba 1.1 $GetHTMLAttrsChecker->({
4453     name => sub { },
4454 wakaba 1.70 type => $HTMLIMTAttrChecker,
4455 wakaba 1.1 value => sub { },
4456 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4457     data => 1, ref => 1, object => 1,
4458     }),
4459 wakaba 1.49 }, {
4460     %HTMLAttrStatus,
4461 wakaba 1.154 href => FEATURE_RDFA_REC,
4462 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4463     name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4464 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4465 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4466 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4467 wakaba 1.66 })->(@_);
4468 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4469     $self->{onerror}->(node => $item->{node},
4470 wakaba 1.104 type => 'attribute missing',
4471     text => 'name',
4472     level => $self->{level}->{must});
4473 wakaba 1.1 }
4474 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4475     $self->{onerror}->(node => $item->{node},
4476 wakaba 1.104 type => 'attribute missing',
4477     text => 'value',
4478     level => $self->{level}->{must});
4479 wakaba 1.1 }
4480     },
4481     };
4482    
4483     $Element->{$HTML_NS}->{video} = {
4484 wakaba 1.40 %HTMLTransparentChecker,
4485 wakaba 1.48 status => FEATURE_HTML5_LC,
4486 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4487 wakaba 1.1 src => $HTMLURIAttrChecker,
4488     ## TODO: start, loopstart, loopend, end
4489     ## ISSUE: they MUST be "value time offset"s. Value?
4490 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4491 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4492     controls => $GetHTMLBooleanAttrChecker->('controls'),
4493 wakaba 1.59 poster => $HTMLURIAttrChecker,
4494 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4495     width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4496 wakaba 1.50 }, {
4497     %HTMLAttrStatus,
4498     autoplay => FEATURE_HTML5_LC,
4499     controls => FEATURE_HTML5_LC,
4500 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4501 wakaba 1.50 height => FEATURE_HTML5_LC,
4502 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4503     loopstart => FEATURE_HTML5_AT_RISK,
4504     playcount => FEATURE_HTML5_AT_RISK,
4505 wakaba 1.50 poster => FEATURE_HTML5_LC,
4506     src => FEATURE_HTML5_LC,
4507 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4508 wakaba 1.50 width => FEATURE_HTML5_LC,
4509 wakaba 1.1 }),
4510 wakaba 1.42 check_start => sub {
4511     my ($self, $item, $element_state) = @_;
4512     $element_state->{allow_source}
4513     = not $item->{node}->has_attribute_ns (undef, 'src');
4514     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4515     ## NOTE: It might be set true by |check_element|.
4516 wakaba 1.66
4517     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4518     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4519 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4520     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4521 wakaba 1.42 },
4522     check_child_element => sub {
4523     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4524     $child_is_transparent, $element_state) = @_;
4525 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4526     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4527 wakaba 1.42 $self->{onerror}->(node => $child_el,
4528     type => 'element not allowed:minus',
4529 wakaba 1.104 level => $self->{level}->{must});
4530 wakaba 1.42 delete $element_state->{allow_source};
4531     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4532     #
4533     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4534 wakaba 1.45 unless ($element_state->{allow_source}) {
4535 wakaba 1.104 $self->{onerror}->(node => $child_el,
4536 wakaba 1.72 type => 'element not allowed:flow',
4537 wakaba 1.104 level => $self->{level}->{must});
4538 wakaba 1.42 }
4539 wakaba 1.45 $element_state->{has_source} = 1;
4540 wakaba 1.1 } else {
4541 wakaba 1.42 delete $element_state->{allow_source};
4542 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4543 wakaba 1.42 }
4544     },
4545     check_child_text => sub {
4546     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4547     if ($has_significant) {
4548     delete $element_state->{allow_source};
4549     }
4550 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4551 wakaba 1.42 },
4552     check_end => sub {
4553     my ($self, $item, $element_state) = @_;
4554     if ($element_state->{has_source} == -1) {
4555     $self->{onerror}->(node => $item->{node},
4556 wakaba 1.104 type => 'child element missing',
4557     text => 'source',
4558     level => $self->{level}->{must});
4559 wakaba 1.1 }
4560 wakaba 1.42
4561     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4562 wakaba 1.1 },
4563     };
4564    
4565     $Element->{$HTML_NS}->{audio} = {
4566 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4567 wakaba 1.48 status => FEATURE_HTML5_LC,
4568 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4569     src => $HTMLURIAttrChecker,
4570     ## TODO: start, loopstart, loopend, end
4571     ## ISSUE: they MUST be "value time offset"s. Value?
4572     ## ISSUE: playcount has no conformance creteria
4573     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4574     controls => $GetHTMLBooleanAttrChecker->('controls'),
4575 wakaba 1.50 }, {
4576     %HTMLAttrStatus,
4577     autoplay => FEATURE_HTML5_LC,
4578     controls => FEATURE_HTML5_LC,
4579 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4580     loopend => FEATURE_HTML5_AT_RISK,
4581     loopstart => FEATURE_HTML5_AT_RISK,
4582     playcount => FEATURE_HTML5_AT_RISK,
4583 wakaba 1.50 src => FEATURE_HTML5_LC,
4584 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4585 wakaba 1.42 }),
4586 wakaba 1.1 };
4587    
4588     $Element->{$HTML_NS}->{source} = {
4589 wakaba 1.40 %HTMLEmptyChecker,
4590 wakaba 1.153 status => FEATURE_HTML5_LC,
4591 wakaba 1.40 check_attrs => sub {
4592     my ($self, $item, $element_state) = @_;
4593 wakaba 1.1 $GetHTMLAttrsChecker->({
4594 wakaba 1.90 media => $HTMLMQAttrChecker,
4595     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4596     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4597 wakaba 1.1 type => $HTMLIMTAttrChecker,
4598 wakaba 1.50 }, {
4599     %HTMLAttrStatus,
4600 wakaba 1.153 media => FEATURE_HTML5_LC,
4601     pixelratio => FEATURE_HTML5_LC,
4602     src => FEATURE_HTML5_LC,
4603     type => FEATURE_HTML5_LC,
4604 wakaba 1.66 })->(@_);
4605 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4606     $self->{onerror}->(node => $item->{node},
4607 wakaba 1.104 type => 'attribute missing',
4608     text => 'src',
4609     level => $self->{level}->{must});
4610 wakaba 1.1 }
4611 wakaba 1.66
4612     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4613 wakaba 1.1 },
4614     };
4615    
4616     $Element->{$HTML_NS}->{canvas} = {
4617 wakaba 1.40 %HTMLTransparentChecker,
4618 wakaba 1.89 status => FEATURE_HTML5_COMPLETE,
4619 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4620 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4621     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4622 wakaba 1.50 }, {
4623     %HTMLAttrStatus,
4624 wakaba 1.89 height => FEATURE_HTML5_COMPLETE,
4625     width => FEATURE_HTML5_COMPLETE,
4626 wakaba 1.1 }),
4627     };
4628    
4629     $Element->{$HTML_NS}->{map} = {
4630 wakaba 1.72 %HTMLFlowContentChecker,
4631 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4632 wakaba 1.40 check_attrs => sub {
4633     my ($self, $item, $element_state) = @_;
4634 wakaba 1.100 my $has_name;
4635 wakaba 1.4 $GetHTMLAttrsChecker->({
4636 wakaba 1.100 name => sub {
4637     my ($self, $attr) = @_;
4638     my $value = $attr->value;
4639     if (length $value) {
4640     ## NOTE: Duplication is not non-conforming.
4641     ## NOTE: Space characters are not non-conforming.
4642     #
4643     } else {
4644     $self->{onerror}->(node => $attr,
4645     type => 'empty attribute value',
4646 wakaba 1.104 level => $self->{level}->{must});
4647 wakaba 1.100 }
4648 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4649 wakaba 1.100 $has_name = [$value, $attr];
4650 wakaba 1.4 },
4651 wakaba 1.49 }, {
4652     %HTMLAttrStatus,
4653 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4654     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4655     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4656     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4657     #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4658     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4659 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4660     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4661     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4662     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4663     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4664     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4665     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4666     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4667     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4668     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4669 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4670 wakaba 1.66 })->(@_);
4671 wakaba 1.100
4672 wakaba 1.135 if ($has_name) {
4673 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4674 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4675 wakaba 1.155 $self->{onerror}
4676     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4677     type => 'id ne name',
4678     level => $self->{level}->{must});
4679 wakaba 1.100 }
4680 wakaba 1.135 } else {
4681 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4682 wakaba 1.104 type => 'attribute missing',
4683     text => 'name',
4684     level => $self->{level}->{must});
4685 wakaba 1.100 }
4686 wakaba 1.4 },
4687 wakaba 1.59 check_start => sub {
4688     my ($self, $item, $element_state) = @_;
4689     $element_state->{in_map_original} = $self->{flag}->{in_map};
4690 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4691     ## NOTE: |{in_map}| is a reference to the array which contains
4692     ## hash references. Hashes are corresponding to the opening
4693     ## |map| elements and each of them contains the key-value
4694     ## pairs corresponding to the absolute URLs for the processed
4695     ## |area| elements in the |map| element corresponding to the
4696     ## hash. The key represents the resource (## TODO: use
4697     ## absolute URL), while the value represents whether there is
4698     ## an |area| element whose |alt| attribute is specified to a
4699     ## non-empty value. If there IS such an |area| element for
4700     ## the resource specified by the key, then the value is set to
4701     ## zero (|0|). Otherwise, if there is no such an |area|
4702     ## element but there is any |area| element with the empty
4703     ## |alt=""| attribute, then the value contains an array
4704     ## reference that contains all of such |area| elements.
4705 wakaba 1.79
4706     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4707     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4708 wakaba 1.59 },
4709     check_end => sub {
4710     my ($self, $item, $element_state) = @_;
4711 wakaba 1.137
4712     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4713     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4714     next unless $nodes;
4715     for (@$nodes) {
4716     $self->{onerror}->(type => 'empty area alt',
4717     node => $_,
4718     level => $self->{level}->{html5_no_may});
4719     }
4720     }
4721    
4722     $self->{flag}->{in_map} = $element_state->{in_map_original};
4723    
4724 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4725 wakaba 1.59 },
4726 wakaba 1.1 };
4727    
4728     $Element->{$HTML_NS}->{area} = {
4729 wakaba 1.40 %HTMLEmptyChecker,
4730 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4731 wakaba 1.40 check_attrs => sub {
4732     my ($self, $item, $element_state) = @_;
4733 wakaba 1.1 my %attr;
4734     my $coords;
4735 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4736 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4737     $attr_ns = '' unless defined $attr_ns;
4738     my $attr_ln = $attr->manakai_local_name;
4739     my $checker;
4740 wakaba 1.73 my $status;
4741 wakaba 1.1 if ($attr_ns eq '') {
4742 wakaba 1.73 $status = {
4743     %HTMLAttrStatus,
4744     %HTMLM12NCommonAttrStatus,
4745 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
4746 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4747     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4748 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4749 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4750     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4751 wakaba 1.154 media => FEATURE_HTML5_WD,
4752 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4753     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4754     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4755 wakaba 1.153 ping => FEATURE_HTML5_WD,
4756 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4757 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4758 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4759 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4760     type => FEATURE_HTML5_WD,
4761 wakaba 1.73 }->{$attr_ln};
4762    
4763 wakaba 1.1 $checker = {
4764 wakaba 1.153 alt => sub {
4765     ## NOTE: Checked later.
4766     },
4767 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4768     circ => -1, circle => 1,
4769     default => 1,
4770     poly => 1, polygon => -1,
4771     rect => 1, rectangle => -1,
4772     }),
4773     coords => sub {
4774     my ($self, $attr) = @_;
4775     my $value = $attr->value;
4776     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4777     $coords = [split /,/, $value];
4778     } else {
4779     $self->{onerror}->(node => $attr,
4780 wakaba 1.104 type => 'coords:syntax error',
4781     level => $self->{level}->{must});
4782 wakaba 1.1 }
4783     },
4784 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4785     target => $HTMLTargetAttrChecker,
4786 wakaba 1.1 href => $HTMLURIAttrChecker,
4787     ping => $HTMLSpaceURIsAttrChecker,
4788 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4789 wakaba 1.1 media => $HTMLMQAttrChecker,
4790     hreflang => $HTMLLanguageTagAttrChecker,
4791     type => $HTMLIMTAttrChecker,
4792     }->{$attr_ln};
4793     if ($checker) {
4794     $attr{$attr_ln} = $attr;
4795 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4796     $attr_ln !~ /[A-Z]/) {
4797 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4798     $status = $HTMLDatasetAttrStatus;
4799 wakaba 1.1 } else {
4800     $checker = $HTMLAttrChecker->{$attr_ln};
4801     }
4802     }
4803     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4804 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4805     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4806     || $AttrStatus->{$attr_ns}->{''};
4807     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4808 wakaba 1.62
4809 wakaba 1.1 if ($checker) {
4810 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4811 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4812 wakaba 1.54 #
4813 wakaba 1.1 } else {
4814 wakaba 1.104 $self->{onerror}->(node => $attr,
4815     type => 'unknown attribute',
4816     level => $self->{level}->{uncertain});
4817 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4818     }
4819 wakaba 1.49
4820 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4821 wakaba 1.1 }
4822    
4823     if (defined $attr{href}) {
4824 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4825 wakaba 1.137 if (defined $attr{alt}) {
4826     my $url = $attr{href}->value; ## TODO: resolve
4827     if (length $attr{alt}->value) {
4828     for (@{$self->{flag}->{in_map} or []}) {
4829     $_->{$url} = 0;
4830     }
4831     } else {
4832     ## NOTE: Empty |alt=""|. If there is another |area| element
4833     ## with the same |href=""| and that |area| elemnet's
4834     ## |alt=""| attribute is not an empty string, then this
4835     ## is conforming.
4836     for (@{$self->{flag}->{in_map} or []}) {
4837     push @{$_->{$url} ||= []}, $attr{alt}
4838     unless exists $_->{$url} and not $_->{$url};
4839     }
4840     }
4841     } else {
4842 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4843 wakaba 1.104 type => 'attribute missing',
4844     text => 'alt',
4845     level => $self->{level}->{must});
4846 wakaba 1.1 }
4847     } else {
4848     for (qw/target ping rel media hreflang type alt/) {
4849     if (defined $attr{$_}) {
4850     $self->{onerror}->(node => $attr{$_},
4851 wakaba 1.104 type => 'attribute not allowed',
4852     level => $self->{level}->{must});
4853 wakaba 1.1 }
4854     }
4855     }
4856    
4857     my $shape = 'rectangle';
4858     if (defined $attr{shape}) {
4859     $shape = {
4860     circ => 'circle', circle => 'circle',
4861     default => 'default',
4862     poly => 'polygon', polygon => 'polygon',
4863     rect => 'rectangle', rectangle => 'rectangle',
4864     }->{lc $attr{shape}->value} || 'rectangle';
4865     ## TODO: ASCII lowercase?
4866     }
4867    
4868     if ($shape eq 'circle') {
4869     if (defined $attr{coords}) {
4870     if (defined $coords) {
4871     if (@$coords == 3) {
4872     if ($coords->[2] < 0) {
4873     $self->{onerror}->(node => $attr{coords},
4874 wakaba 1.104 type => 'coords:out of range',
4875     index => 2,
4876     value => $coords->[2],
4877     level => $self->{level}->{must});
4878 wakaba 1.1 }
4879     } else {
4880     $self->{onerror}->(node => $attr{coords},
4881 wakaba 1.104 type => 'coords:number not 3',
4882     text => 0+@$coords,
4883     level => $self->{level}->{must});
4884 wakaba 1.1 }
4885     } else {
4886     ## NOTE: A syntax error has been reported.
4887     }
4888     } else {
4889 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4890 wakaba 1.104 type => 'attribute missing',
4891     text => 'coords',
4892     level => $self->{level}->{must});
4893 wakaba 1.1 }
4894     } elsif ($shape eq 'default') {
4895     if (defined $attr{coords}) {
4896     $self->{onerror}->(node => $attr{coords},
4897 wakaba 1.104 type => 'attribute not allowed',
4898     level => $self->{level}->{must});
4899 wakaba 1.1 }
4900     } elsif ($shape eq 'polygon') {
4901     if (defined $attr{coords}) {
4902     if (defined $coords) {
4903     if (@$coords >= 6) {
4904     unless (@$coords % 2 == 0) {
4905     $self->{onerror}->(node => $attr{coords},
4906 wakaba 1.104 type => 'coords:number not even',
4907     text => 0+@$coords,
4908     level => $self->{level}->{must});
4909 wakaba 1.1 }
4910     } else {
4911     $self->{onerror}->(node => $attr{coords},
4912 wakaba 1.104 type => 'coords:number lt 6',
4913     text => 0+@$coords,
4914     level => $self->{level}->{must});
4915 wakaba 1.1 }
4916     } else {
4917     ## NOTE: A syntax error has been reported.
4918     }
4919     } else {
4920 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4921 wakaba 1.104 type => 'attribute missing',
4922     text => 'coords',
4923     level => $self->{level}->{must});
4924 wakaba 1.1 }
4925     } elsif ($shape eq 'rectangle') {
4926     if (defined $attr{coords}) {
4927     if (defined $coords) {
4928     if (@$coords == 4) {
4929     unless ($coords->[0] < $coords->[2]) {
4930     $self->{onerror}->(node => $attr{coords},
4931 wakaba 1.104 type => 'coords:out of range',
4932     index => 0,
4933     value => $coords->[0],
4934     level => $self->{level}->{must});
4935 wakaba 1.1 }
4936     unless ($coords->[1] < $coords->[3]) {
4937     $self->{onerror}->(node => $attr{coords},
4938 wakaba 1.104 type => 'coords:out of range',
4939     index => 1,
4940     value => $coords->[1],
4941     level => $self->{level}->{must});
4942 wakaba 1.1 }
4943     } else {
4944     $self->{onerror}->(node => $attr{coords},
4945 wakaba 1.104 type => 'coords:number not 4',
4946     text => 0+@$coords,
4947     level => $self->{level}->{must});
4948 wakaba 1.1 }
4949     } else {
4950     ## NOTE: A syntax error has been reported.
4951     }
4952     } else {
4953 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4954 wakaba 1.104 type => 'attribute missing',
4955     text => 'coords',
4956     level => $self->{level}->{must});
4957 wakaba 1.1 }
4958     }
4959 wakaba 1.66
4960     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
4961 wakaba 1.1 },
4962 wakaba 1.59 check_start => sub {
4963     my ($self, $item, $element_state) = @_;
4964     unless ($self->{flag}->{in_map} or
4965     not $item->{node}->manakai_parent_element) {
4966     $self->{onerror}->(node => $item->{node},
4967     type => 'element not allowed:area',
4968 wakaba 1.104 level => $self->{level}->{must});
4969 wakaba 1.59 }
4970 wakaba 1.79
4971     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4972     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4973 wakaba 1.59 },
4974 wakaba 1.1 };
4975    
4976     $Element->{$HTML_NS}->{table} = {
4977 wakaba 1.40 %HTMLChecker,
4978 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4979 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
4980 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
4981     cellspacing => $HTMLLengthAttrChecker,
4982 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
4983     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
4984     lhs => 1, rhs => 1, box => 1, border => 1,
4985     }),
4986     rules => $GetHTMLEnumeratedAttrChecker->({
4987     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
4988     }),
4989     summary => sub {}, ## NOTE: %Text; in HTML4.
4990     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
4991     }, {
4992 wakaba 1.49 %HTMLAttrStatus,
4993 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4994 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4995     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
4996     border => FEATURE_M12N10_REC,
4997     cellpadding => FEATURE_M12N10_REC,
4998     cellspacing => FEATURE_M12N10_REC,
4999 wakaba 1.61 cols => FEATURE_RFC1942,
5000 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5001     dataformatas => FEATURE_HTML4_REC_RESERVED,
5002     datapagesize => FEATURE_M12N10_REC,
5003     datasrc => FEATURE_HTML4_REC_RESERVED,
5004     frame => FEATURE_M12N10_REC,
5005 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5006 wakaba 1.49 rules => FEATURE_M12N10_REC,
5007     summary => FEATURE_M12N10_REC,
5008     width => FEATURE_M12N10_REC,
5009     }),
5010 wakaba 1.40 check_start => sub {
5011     my ($self, $item, $element_state) = @_;
5012     $element_state->{phase} = 'before caption';
5013 wakaba 1.66
5014     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5015 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5016     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5017 wakaba 1.40 },
5018     check_child_element => sub {
5019     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5020     $child_is_transparent, $element_state) = @_;
5021 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5022     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5023 wakaba 1.40 $self->{onerror}->(node => $child_el,
5024     type => 'element not allowed:minus',
5025 wakaba 1.104 level => $self->{level}->{must});
5026 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5027     #
5028     } elsif ($element_state->{phase} eq 'in tbodys') {
5029     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5030     #$element_state->{phase} = 'in tbodys';
5031     } elsif (not $element_state->{has_tfoot} and
5032     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5033     $element_state->{phase} = 'after tfoot';
5034     $element_state->{has_tfoot} = 1;
5035     } else {
5036 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5037     level => $self->{level}->{must});
5038 wakaba 1.40 }
5039     } elsif ($element_state->{phase} eq 'in trs') {
5040     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5041     #$element_state->{phase} = 'in trs';
5042     } elsif (not $element_state->{has_tfoot} and
5043     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5044     $element_state->{phase} = 'after tfoot';
5045     $element_state->{has_tfoot} = 1;
5046     } else {
5047 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5048     level => $self->{level}->{must});
5049 wakaba 1.40 }
5050     } elsif ($element_state->{phase} eq 'after thead') {
5051     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5052     $element_state->{phase} = 'in tbodys';
5053     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5054     $element_state->{phase} = 'in trs';
5055     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5056     $element_state->{phase} = 'in tbodys';
5057     $element_state->{has_tfoot} = 1;
5058     } else {
5059 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5060     level => $self->{level}->{must});
5061 wakaba 1.40 }
5062     } elsif ($element_state->{phase} eq 'in colgroup') {
5063     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5064     $element_state->{phase} = 'in colgroup';
5065     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5066     $element_state->{phase} = 'after thead';
5067     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5068     $element_state->{phase} = 'in tbodys';
5069     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5070     $element_state->{phase} = 'in trs';
5071     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5072     $element_state->{phase} = 'in tbodys';
5073     $element_state->{has_tfoot} = 1;
5074     } else {
5075 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5076     level => $self->{level}->{must});
5077 wakaba 1.40 }
5078     } elsif ($element_state->{phase} eq 'before caption') {
5079     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5080     $element_state->{phase} = 'in colgroup';
5081     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5082     $element_state->{phase} = 'in colgroup';
5083     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5084     $element_state->{phase} = 'after thead';
5085     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5086     $element_state->{phase} = 'in tbodys';
5087     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5088     $element_state->{phase} = 'in trs';
5089     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5090     $element_state->{phase} = 'in tbodys';
5091     $element_state->{has_tfoot} = 1;
5092     } else {
5093 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5094     level => $self->{level}->{must});
5095 wakaba 1.40 }
5096     } elsif ($element_state->{phase} eq 'after tfoot') {
5097 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5098     level => $self->{level}->{must});
5099 wakaba 1.40 } else {
5100     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5101     }
5102     },
5103     check_child_text => sub {
5104     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5105     if ($has_significant) {
5106 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5107     level => $self->{level}->{must});
5108 wakaba 1.1 }
5109 wakaba 1.40 },
5110     check_end => sub {
5111     my ($self, $item, $element_state) = @_;
5112 wakaba 1.1
5113     ## Table model errors
5114     require Whatpm::HTMLTable;
5115 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5116 wakaba 1.104 $self->{onerror}->(@_);
5117     }, $self->{level});
5118 wakaba 1.87 Whatpm::HTMLTable->assign_header
5119 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5120 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5121 wakaba 1.1
5122 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5123 wakaba 1.1 },
5124     };
5125    
5126     $Element->{$HTML_NS}->{caption} = {
5127 wakaba 1.169 %HTMLFlowContentChecker,
5128 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5129 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5130     align => $GetHTMLEnumeratedAttrChecker->({
5131     top => 1, bottom => 1, left => 1, right => 1,
5132     }),
5133     }, {
5134 wakaba 1.49 %HTMLAttrStatus,
5135 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5136 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5137 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5138 wakaba 1.49 }),
5139 wakaba 1.169 check_start => sub {
5140     my ($self, $item, $element_state) = @_;
5141     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5142    
5143     $HTMLFlowContentChecker{check_start}->(@_);
5144     },
5145     check_end => sub {
5146     my ($self, $item, $element_state) = @_;
5147     $self->_remove_minus_elements ($element_state);
5148    
5149     $HTMLFlowContentChecker{check_end}->(@_);
5150     },
5151     }; # caption
5152 wakaba 1.1
5153 wakaba 1.69 my %cellalign = (
5154     ## HTML4 %cellhalign;
5155 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5156     left => 1, center => 1, right => 1, justify => 1, char => 1,
5157     }),
5158     char => sub {
5159     my ($self, $attr) = @_;
5160 wakaba 1.69
5161 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5162    
5163     my $value = $attr->value;
5164     if (length $value != 1) {
5165     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5166 wakaba 1.105 level => $self->{level}->{html4_fact});
5167 wakaba 1.70 }
5168     },
5169 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5170    
5171 wakaba 1.69 ## HTML4 %cellvalign;
5172 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5173     top => 1, middle => 1, bottom => 1, baseline => 1,
5174     }),
5175 wakaba 1.69 );
5176    
5177 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5178 wakaba 1.40 %HTMLEmptyChecker,
5179 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5180 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5181 wakaba 1.69 %cellalign,
5182 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5183     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5184     ## TODO: "attribute not supported" if |col|.
5185     ## ISSUE: MUST NOT if any |col|?
5186     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5187 wakaba 1.49 }, {
5188     %HTMLAttrStatus,
5189 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5190 wakaba 1.49 align => FEATURE_M12N10_REC,
5191     char => FEATURE_M12N10_REC,
5192     charoff => FEATURE_M12N10_REC,
5193 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5194     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5195 wakaba 1.49 valign => FEATURE_M12N10_REC,
5196     width => FEATURE_M12N10_REC,
5197 wakaba 1.1 }),
5198 wakaba 1.40 check_child_element => sub {
5199     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5200     $child_is_transparent, $element_state) = @_;
5201 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5202     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5203 wakaba 1.40 $self->{onerror}->(node => $child_el,
5204     type => 'element not allowed:minus',
5205 wakaba 1.104 level => $self->{level}->{must});
5206 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5207     #
5208     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5209     #
5210     } else {
5211 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5212     level => $self->{level}->{must});
5213 wakaba 1.40 }
5214     },
5215     check_child_text => sub {
5216     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5217     if ($has_significant) {
5218 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5219     level => $self->{level}->{must});
5220 wakaba 1.1 }
5221     },
5222     };
5223    
5224     $Element->{$HTML_NS}->{col} = {
5225 wakaba 1.40 %HTMLEmptyChecker,
5226 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5227 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5228 wakaba 1.69 %cellalign,
5229 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5230 wakaba 1.49 }, {
5231     %HTMLAttrStatus,
5232 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5233 wakaba 1.49 align => FEATURE_M12N10_REC,
5234     char => FEATURE_M12N10_REC,
5235     charoff => FEATURE_M12N10_REC,
5236 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5237     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5238 wakaba 1.49 valign => FEATURE_M12N10_REC,
5239     width => FEATURE_M12N10_REC,
5240 wakaba 1.1 }),
5241     };
5242    
5243     $Element->{$HTML_NS}->{tbody} = {
5244 wakaba 1.40 %HTMLChecker,
5245 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5246 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5247     %cellalign,
5248     }, {
5249 wakaba 1.49 %HTMLAttrStatus,
5250 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5251 wakaba 1.49 align => FEATURE_M12N10_REC,
5252     char => FEATURE_M12N10_REC,
5253     charoff => FEATURE_M12N10_REC,
5254 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5255 wakaba 1.49 valign => FEATURE_M12N10_REC,
5256     }),
5257 wakaba 1.40 check_child_element => sub {
5258     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5259     $child_is_transparent, $element_state) = @_;
5260 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5261     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5262 wakaba 1.40 $self->{onerror}->(node => $child_el,
5263     type => 'element not allowed:minus',
5264 wakaba 1.104 level => $self->{level}->{must});
5265 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5266     #
5267     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5268 wakaba 1.84 #
5269 wakaba 1.40 } else {
5270 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5271     level => $self->{level}->{must});
5272 wakaba 1.40 }
5273     },
5274     check_child_text => sub {
5275     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5276     if ($has_significant) {
5277 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5278     level => $self->{level}->{must});
5279 wakaba 1.1 }
5280 wakaba 1.40 },
5281 wakaba 1.1 };
5282    
5283     $Element->{$HTML_NS}->{thead} = {
5284 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5285 wakaba 1.1 };
5286    
5287     $Element->{$HTML_NS}->{tfoot} = {
5288 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5289 wakaba 1.1 };
5290    
5291     $Element->{$HTML_NS}->{tr} = {
5292 wakaba 1.40 %HTMLChecker,
5293 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5294 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5295     %cellalign,
5296     bgcolor => $HTMLColorAttrChecker,
5297     }, {
5298 wakaba 1.49 %HTMLAttrStatus,
5299 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5300 wakaba 1.49 align => FEATURE_M12N10_REC,
5301     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5302     char => FEATURE_M12N10_REC,
5303     charoff => FEATURE_M12N10_REC,
5304 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5305 wakaba 1.49 valign => FEATURE_M12N10_REC,
5306     }),
5307 wakaba 1.40 check_child_element => sub {
5308     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5309     $child_is_transparent, $element_state) = @_;
5310 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5311     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5312 wakaba 1.40 $self->{onerror}->(node => $child_el,
5313     type => 'element not allowed:minus',
5314 wakaba 1.104 level => $self->{level}->{must});
5315 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5316     #
5317     } elsif ($child_nsuri eq $HTML_NS and
5318     ($child_ln eq 'td' or $child_ln eq 'th')) {
5319 wakaba 1.84 #
5320 wakaba 1.40 } else {
5321 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5322     level => $self->{level}->{must});
5323 wakaba 1.40 }
5324     },
5325     check_child_text => sub {
5326     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5327     if ($has_significant) {
5328 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5329     level => $self->{level}->{must});
5330 wakaba 1.1 }
5331     },
5332     };
5333    
5334     $Element->{$HTML_NS}->{td} = {
5335 wakaba 1.72 %HTMLFlowContentChecker,
5336 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5337 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5338 wakaba 1.69 %cellalign,
5339     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5340     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5341     bgcolor => $HTMLColorAttrChecker,
5342 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5343 wakaba 1.87 headers => sub {
5344     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5345     ## Though that method does not check the |headers| attribute of a
5346     ## |td| element if the element does not form a table, in that case
5347     ## the |td| element is non-conforming anyway.
5348     },
5349 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5350 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5351 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5352     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5353 wakaba 1.49 }, {
5354     %HTMLAttrStatus,
5355 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5356     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5357 wakaba 1.49 align => FEATURE_M12N10_REC,
5358 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5359 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5360     char => FEATURE_M12N10_REC,
5361     charoff => FEATURE_M12N10_REC,
5362 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5363     headers => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5364 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5365 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5366 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5367 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5368 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5369 wakaba 1.49 valign => FEATURE_M12N10_REC,
5370     width => FEATURE_M12N10_REC_DEPRECATED,
5371 wakaba 1.1 }),
5372     };
5373    
5374     $Element->{$HTML_NS}->{th} = {
5375 wakaba 1.40 %HTMLPhrasingContentChecker,
5376 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5377 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5378 wakaba 1.69 %cellalign,
5379     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5380     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5381     bgcolor => $HTMLColorAttrChecker,
5382 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5383 wakaba 1.87 ## TODO: HTML4(?) |headers|
5384 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5385 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5386     scope => $GetHTMLEnumeratedAttrChecker
5387     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5388 wakaba 1.49 }, {
5389     %HTMLAttrStatus,
5390 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5391     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5392 wakaba 1.49 align => FEATURE_M12N10_REC,
5393 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5394 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5395     char => FEATURE_M12N10_REC,
5396     charoff => FEATURE_M12N10_REC,
5397 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5398 wakaba 1.82 headers => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5399 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5400 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5401 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5402 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5403     scope => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5404 wakaba 1.49 valign => FEATURE_M12N10_REC,
5405     width => FEATURE_M12N10_REC_DEPRECATED,
5406 wakaba 1.1 }),
5407     };
5408    
5409 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5410 wakaba 1.121 %HTMLFlowContentChecker,
5411 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5412 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5413 wakaba 1.161 accept => $AcceptAttrChecker,
5414 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5415 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5416 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5417 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5418     'application/x-www-form-urlencoded' => 1,
5419     'multipart/form-data' => 1,
5420     'text/plain' => 1,
5421     }),
5422 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5423     get => 1, post => 1, put => 1, delete => 1,
5424     }),
5425 wakaba 1.133 name => sub {
5426     my ($self, $attr) = @_;
5427    
5428     my $value = $attr->value;
5429     if ($value eq '') {
5430     $self->{onerror}->(type => 'empty form name',
5431     node => $attr,
5432     level => $self->{level}->{must});
5433     } else {
5434     if ($self->{form}->{$value}) {
5435     $self->{onerror}->(type => 'duplicate form name',
5436     node => $attr,
5437     value => $value,
5438     level => $self->{level}->{must});
5439     } else {
5440     $self->{form}->{$value} = 1;
5441     }
5442     }
5443     },
5444 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5445     ## TODO: Tests for following attrs:
5446 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5447     onforminput => $HTMLEventHandlerAttrChecker,
5448 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5449     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5450 wakaba 1.52 target => $HTMLTargetAttrChecker,
5451     }, {
5452     %HTMLAttrStatus,
5453     %HTMLM12NCommonAttrStatus,
5454 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5455 wakaba 1.119 'accept-charset' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5456     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5457 wakaba 1.56 data => FEATURE_WF2,
5458 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5459 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5460 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5461     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5462     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5463 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5464 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5465     onforminput => FEATURE_WF2_INFORMATIVE,
5466 wakaba 1.56 onreceived => FEATURE_WF2,
5467 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5468     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5469 wakaba 1.56 replace => FEATURE_WF2,
5470 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5471     sdasuff => FEATURE_HTML20_RFC,
5472 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5473 wakaba 1.52 }),
5474 wakaba 1.66 check_start => sub {
5475     my ($self, $item, $element_state) = @_;
5476 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5477 wakaba 1.66
5478     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5479     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5480 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5481     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5482 wakaba 1.136 $element_state->{id_type} = 'form';
5483 wakaba 1.66 },
5484 wakaba 1.121 check_end => sub {
5485     my ($self, $item, $element_state) = @_;
5486     $self->_remove_minus_elements ($element_state);
5487    
5488     $HTMLFlowContentChecker{check_end}->(@_);
5489     },
5490 wakaba 1.52 };
5491    
5492     $Element->{$HTML_NS}->{fieldset} = {
5493 wakaba 1.134 %HTMLFlowContentChecker,
5494 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5495 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5496     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5497 wakaba 1.136 form => $HTMLFormAttrChecker,
5498 wakaba 1.165 name => $FormControlNameAttrChecker,
5499 wakaba 1.56 }, {
5500 wakaba 1.52 %HTMLAttrStatus,
5501     %HTMLM12NCommonAttrStatus,
5502 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5503     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5504 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5505 wakaba 1.125 name => FEATURE_HTML5_DEFAULT,
5506 wakaba 1.52 }),
5507 wakaba 1.134 ## NOTE: legend, Flow
5508     check_child_element => sub {
5509     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5510     $child_is_transparent, $element_state) = @_;
5511     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5512     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5513     $self->{onerror}->(node => $child_el,
5514     type => 'element not allowed:minus',
5515     level => $self->{level}->{must});
5516     $element_state->{has_non_legend} = 1;
5517     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5518     #
5519     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5520     if ($element_state->{has_non_legend}) {
5521     $self->{onerror}->(node => $child_el,
5522     type => 'element not allowed:details legend',
5523     level => $self->{level}->{must});
5524     }
5525     $element_state->{has_legend} = 1;
5526     $element_state->{has_non_legend} = 1;
5527     } else {
5528     $HTMLFlowContentChecker{check_child_element}->(@_);
5529     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5530     ## TODO:
5531 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5532 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5533     ## therefore |details| part of the content model does not match.
5534     }
5535     },
5536     check_child_text => sub {
5537     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5538     if ($has_significant) {
5539     $element_state->{has_non_legend} = 1;
5540     }
5541     },
5542     check_end => sub {
5543     my ($self, $item, $element_state) = @_;
5544    
5545     unless ($element_state->{has_legend}) {
5546     $self->{onerror}->(node => $item->{node},
5547     type => 'child element missing',
5548     text => 'legend',
5549     level => $self->{level}->{must});
5550     }
5551    
5552     $HTMLFlowContentChecker{check_end}->(@_);
5553 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5554 wakaba 1.134 },
5555     ## NOTE: This definition is partially reused by |details| element's
5556     ## checker.
5557 wakaba 1.52 };
5558    
5559     $Element->{$HTML_NS}->{input} = {
5560 wakaba 1.119 %HTMLEmptyChecker,
5561     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5562 wakaba 1.140 check_attrs => sub {
5563     my ($self, $item, $element_state) = @_;
5564 wakaba 1.142
5565 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5566 wakaba 1.142 $state = 'text' unless defined $state;
5567     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5568    
5569 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5570     my $attr_ns = $attr->namespace_uri;
5571     $attr_ns = '' unless defined $attr_ns;
5572     my $attr_ln = $attr->manakai_local_name;
5573     my $checker;
5574     my $status;
5575     if ($attr_ns eq '') {
5576     $status =
5577     {
5578     %HTMLAttrStatus,
5579     %HTMLM12NCommonAttrStatus,
5580     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5581     'accept-charset' => FEATURE_HTML2X_RFC,
5582 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5583 wakaba 1.140 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5584     align => FEATURE_M12N10_REC_DEPRECATED,
5585     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5586     autocomplete => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5587     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5588     checked => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5589     datafld => FEATURE_HTML4_REC_RESERVED,
5590     dataformatas => FEATURE_HTML4_REC_RESERVED,
5591     datasrc => FEATURE_HTML4_REC_RESERVED,
5592     disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5593     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5594     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5595 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5596     FEATURE_XHTMLBASIC11_CR,
5597 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5598 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5599 wakaba 1.140 list => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5600     max => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5601 wakaba 1.150 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X |
5602     FEATURE_M12N10_REC,
5603 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5604     min => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5605 wakaba 1.156 multiple => FEATURE_HTML5_DEFAULT,
5606 wakaba 1.140 name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5607 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5608 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5609     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5610     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5611     onformchange => FEATURE_WF2_INFORMATIVE,
5612     onforminput => FEATURE_WF2_INFORMATIVE,
5613     oninput => FEATURE_WF2,
5614     oninvalid => FEATURE_WF2,
5615     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5616     pattern => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5617 wakaba 1.156 placeholder => FEATURE_HTML5_DEFAULT,
5618 wakaba 1.140 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5619     replace => FEATURE_WF2,
5620     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5621     sdapref => FEATURE_HTML20_RFC,
5622 wakaba 1.154 size => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5623 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5624     step => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5625     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5626     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5627 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5628 wakaba 1.140 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5629     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5630     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5631     }->{$attr_ln};
5632    
5633     $checker =
5634     {
5635 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5636     ## applicable for a specific set of states.
5637 wakaba 1.142 accept => '',
5638 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5639     ## NOTE: To which states it applies is not defined in RFC 2070.
5640 wakaba 1.142 action => '',
5641 wakaba 1.150 align => '',
5642 wakaba 1.141 alt => '',
5643 wakaba 1.142 autocomplete => '',
5644 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5645     ## NOTE: <input type=hidden disabled> is not disallowed.
5646 wakaba 1.142 checked => '',
5647     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5648 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5649 wakaba 1.142 enctype => '',
5650     form => $HTMLFormAttrChecker,
5651 wakaba 1.150 inputmode => '',
5652     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5653 wakaba 1.142 list => '',
5654     max => '',
5655     maxlength => '',
5656     method => '',
5657     min => '',
5658 wakaba 1.156 multiple => '',
5659 wakaba 1.165 name => $FormControlNameAttrChecker,
5660 wakaba 1.166 novalidate => '',
5661 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5662     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5663     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5664     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5665     ## TODO: tests for four attributes above
5666 wakaba 1.142 pattern => '',
5667 wakaba 1.156 placeholder => '',
5668 wakaba 1.142 readonly => '',
5669 wakaba 1.150 replace => '',
5670 wakaba 1.142 required => '',
5671     size => '',
5672     src => '',
5673     step => '',
5674     target => '',
5675 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5676 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5677     email => 1, password => 1,
5678 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5679 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5680     checkbox => 1,
5681 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5682     button => 1,
5683 wakaba 1.140 }),
5684 wakaba 1.151 usemap => '',
5685 wakaba 1.142 value => '',
5686 wakaba 1.140 }->{$attr_ln};
5687 wakaba 1.141
5688     ## State-dependent checkers
5689     unless ($checker) {
5690     if ($state eq 'hidden') {
5691     $checker =
5692     {
5693 wakaba 1.142 value => sub {
5694     my ($self, $attr, $item, $element_state) = @_;
5695 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5696 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5697     $self->{onerror}->(node => $attr,
5698     type => '_charset_ value',
5699     level => $self->{level}->{must});
5700     }
5701     },
5702 wakaba 1.141 }->{$attr_ln} || $checker;
5703 wakaba 1.142 ## TODO: Warn if no name attribute?
5704     ## TODO: Warn if name!=_charset_ and no value attribute?
5705 wakaba 1.168 } elsif ({
5706     datetime => 1, date => 1, month => 1, time => 1,
5707     week => 1, 'datetime-local' => 1,
5708     }->{$state}) {
5709     my $v = {
5710     datetime => ['global_date_and_time_string'],
5711     date => ['date_string'],
5712     month => ['month_string'],
5713     week => ['week_string'],
5714     time => ['time_string'],
5715     'datetime-local' => ['local_date_and_time_string'],
5716     }->{$state};
5717 wakaba 1.144 $checker =
5718     {
5719 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5720     on => 1, off => 1,
5721     }),
5722 wakaba 1.158 list => $ListAttrChecker,
5723 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5724     max => $GetDateTimeAttrChecker->($v->[0]),
5725 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5726 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5727 wakaba 1.148 step => $StepAttrChecker,
5728 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5729 wakaba 1.144 }->{$attr_ln} || $checker;
5730     } elsif ($state eq 'number') {
5731     $checker =
5732     {
5733 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5734     on => 1, off => 1,
5735     }),
5736 wakaba 1.158 list => $ListAttrChecker,
5737 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5738     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5739 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5740 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5741 wakaba 1.148 step => $StepAttrChecker,
5742 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5743 wakaba 1.144 }->{$attr_ln} || $checker;
5744     } elsif ($state eq 'range') {
5745     $checker =
5746     {
5747 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5748     on => 1, off => 1,
5749     }),
5750 wakaba 1.158 list => $ListAttrChecker,
5751 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5752     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5753 wakaba 1.148 step => $StepAttrChecker,
5754 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5755 wakaba 1.144 }->{$attr_ln} || $checker;
5756 wakaba 1.157 } elsif ($state eq 'color') {
5757     $checker =
5758     {
5759     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5760     on => 1, off => 1,
5761     }),
5762 wakaba 1.158 list => $ListAttrChecker,
5763 wakaba 1.157 value => sub {
5764     my ($self, $attr) = @_;
5765     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5766     $self->{onerror}->(node => $attr,
5767     type => 'scolor:syntax error', ## TODOC: type
5768     level => $self->{level}->{must});
5769     }
5770     },
5771     }->{$attr_ln} || $checker;
5772 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5773     $checker =
5774     {
5775 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5776     ## TODO: tests
5777 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5778 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5779     }->{$attr_ln} || $checker;
5780     ## TODO: There MUST be another input type=radio with same
5781     ## name (Radio state).
5782     ## ISSUE: There should be exactly one type=radio with checked?
5783     } elsif ($state eq 'file') {
5784     $checker =
5785     {
5786 wakaba 1.161 accept => $AcceptAttrChecker,
5787 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5788 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5789 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5790 wakaba 1.144 }->{$attr_ln} || $checker;
5791     } elsif ($state eq 'submit') {
5792     $checker =
5793     {
5794 wakaba 1.149 action => $HTMLURIAttrChecker,
5795 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5796     'application/x-www-form-urlencoded' => 1,
5797     'multipart/form-data' => 1,
5798     'text/plain' => 1,
5799     }),
5800 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5801     get => 1, post => 1, put => 1, delete => 1,
5802     }),
5803 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5804 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5805     document => 1, values => 1,
5806     }),
5807     target => $HTMLTargetAttrChecker,
5808 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5809     }->{$attr_ln} || $checker;
5810     } elsif ($state eq 'image') {
5811     $checker =
5812     {
5813 wakaba 1.149 action => $HTMLURIAttrChecker,
5814     align => $GetHTMLEnumeratedAttrChecker->({
5815     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5816     }),
5817 wakaba 1.144 alt => sub {
5818     my ($self, $attr) = @_;
5819     my $value = $attr->value;
5820     unless (length $value) {
5821     $self->{onerror}->(node => $attr,
5822     type => 'empty anchor image alt',
5823     level => $self->{level}->{must});
5824     }
5825     },
5826 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5827     'application/x-www-form-urlencoded' => 1,
5828     'multipart/form-data' => 1,
5829     'text/plain' => 1,
5830     }),
5831 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5832     method => $GetHTMLEnumeratedAttrChecker->({
5833     get => 1, post => 1, put => 1, delete => 1,
5834     }),
5835 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5836 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5837     document => 1, values => 1,
5838     }),
5839 wakaba 1.144 src => $HTMLURIAttrChecker,
5840     ## TODO: There is requirements on the referenced resource.
5841 wakaba 1.149 target => $HTMLTargetAttrChecker,
5842     usemap => $HTMLUsemapAttrChecker,
5843 wakaba 1.144 }->{$attr_ln} || $checker;
5844     ## TODO: alt & src are required.
5845     } elsif ({
5846     reset => 1, button => 1,
5847     ## NOTE: From Web Forms 2.0:
5848     remove => 1, 'move-up' => 1, 'move-down' => 1,
5849     add => 1,
5850     }->{$state}) {
5851     $checker =
5852     {
5853     ## NOTE: According to Web Forms 2.0, |input| attribute
5854     ## has |template| attribute to support the |add| button
5855     ## type (as part of the repetition template feature). It
5856     ## conflicts with the |template| global attribute
5857     ## introduced as part of the data template feature.
5858     ## NOTE: |template| attribute as defined in Web Forms 2.0
5859     ## has no author requirement.
5860     value => sub { }, ## NOTE: No restriction.
5861     }->{$attr_ln} || $checker;
5862 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5863 wakaba 1.141 $checker =
5864     {
5865 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5866     on => 1, off => 1,
5867     }),
5868 wakaba 1.149 ## TODO: inputmode [WF2]
5869 wakaba 1.158 list => $ListAttrChecker,
5870 wakaba 1.147 maxlength => sub {
5871     my ($self, $attr, $item, $element_state) = @_;
5872    
5873     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5874    
5875 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5876 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5877     ## integers results in a number.
5878     my $max_allowed_value_length = 0+$1;
5879    
5880     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5881     if (defined $value) {
5882     my $codepoint_length = length $value;
5883 wakaba 1.162
5884 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5885     $self->{onerror}
5886     ->(node => $item->{node}
5887     ->get_attribute_node_ns (undef, 'value'),
5888     type => 'value too long',
5889     level => $self->{level}->{must});
5890     }
5891     }
5892     }
5893     },
5894 wakaba 1.160 pattern => $PatternAttrChecker,
5895 wakaba 1.159 placeholder => sub {
5896     my ($self, $attr) = @_;
5897     if ($attr->value =~ /[\x0D\x0A]/) {
5898     $self->{onerror}->(node => $attr,
5899     type => 'newline in value', ## TODOC: type
5900     level => $self->{level}->{must});
5901     }
5902     },
5903 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5904 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5905 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5906 wakaba 1.143 value => sub {
5907 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5908     if ($state eq 'url') {
5909     $HTMLURIAttrChecker->(@_);
5910     } elsif ($state eq 'email') {
5911     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5912     my @addr = split /,/, $attr->value, -1;
5913     @addr = ('') unless @addr;
5914     for (@addr) {
5915 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5916     s/[\x09\x0A\x0C\x0D\x20]\z//;
5917 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5918     $self->{onerror}->(node => $attr,
5919     type => 'email:syntax error', ## TODO: type
5920     value => $_,
5921     level => $self->{level}->{must});
5922     }
5923     }
5924     } else {
5925     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5926     $self->{onerror}->(node => $attr,
5927     type => 'email:syntax error', ## TODO: type
5928     level => $self->{level}->{must});
5929     }
5930     }
5931     } else {
5932     if ($attr->value =~ /[\x0D\x0A]/) {
5933     $self->{onerror}->(node => $attr,
5934     type => 'newline in value', ## TODO: type
5935     level => $self->{level}->{must});
5936     }
5937     }
5938 wakaba 1.143 },
5939 wakaba 1.141 }->{$attr_ln} || $checker;
5940 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5941 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
5942     if $state eq 'email' and $attr_ln eq 'multiple';
5943 wakaba 1.161
5944     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
5945     not $item->{node}->has_attribute_ns (undef, 'title')) {
5946     $self->{onerror}->(node => $item->{node},
5947     type => 'attribute missing',
5948     text => 'title',
5949     level => $self->{level}->{should});
5950     }
5951 wakaba 1.141 }
5952     }
5953    
5954     if (defined $checker) {
5955     if ($checker eq '') {
5956     $checker = sub {
5957     my ($self, $attr) = @_;
5958     $self->{onerror}->(node => $attr,
5959     type => 'input attr not applicable',
5960     text => $state,
5961     level => $self->{level}->{must});
5962     };
5963     }
5964 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
5965     $attr_ln !~ /[A-Z]/) {
5966     $checker = $HTMLDatasetAttrChecker;
5967     $status = $HTMLDatasetAttrStatus;
5968     } else {
5969     $checker = $HTMLAttrChecker->{$attr_ln};
5970     }
5971     }
5972     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
5973     || $AttrChecker->{$attr_ns}->{''};
5974     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
5975     || $AttrStatus->{$attr_ns}->{''};
5976     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
5977 wakaba 1.157
5978 wakaba 1.140 if ($checker) {
5979     $checker->($self, $attr, $item, $element_state) if ref $checker;
5980     } elsif ($attr_ns eq '' and not $status) {
5981     #
5982     } else {
5983     $self->{onerror}->(node => $attr,
5984     type => 'unknown attribute',
5985     level => $self->{level}->{uncertain});
5986     ## ISSUE: No comformance createria for unknown attributes in the spec
5987     }
5988    
5989     $self->_attr_status_info ($attr, $status);
5990     }
5991 wakaba 1.168
5992     ## ISSUE: -0/+0
5993    
5994     if ($state eq 'range') {
5995     $element_state->{number_value}->{min} ||= 0;
5996     $element_state->{number_value}->{max} = 100
5997     unless defined $element_state->{number_value}->{max};
5998     }
5999    
6000     if (defined $element_state->{date_value}->{min} or
6001     defined $element_state->{date_value}->{max}) {
6002     my $min_value = $element_state->{date_value}->{min};
6003     my $max_value = $element_state->{date_value}->{max};
6004     my $value_value = $element_state->{date_value}->{value};
6005    
6006     if (defined $min_value and $min_value eq '' and
6007     (defined $max_value or defined $value_value)) {
6008     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6009     $self->{onerror}->(node => $min,
6010     type => 'date value not supported', ## TODOC: type
6011     value => $min->value,
6012     level => $self->{level}->{unsupported});
6013     undef $min_value;
6014     }
6015     if (defined $max_value and $max_value eq '' and
6016     (defined $max_value or defined $value_value)) {
6017     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6018     $self->{onerror}->(node => $max,
6019     type => 'date value not supported', ## TODOC: type
6020     value => $max->value,
6021     level => $self->{level}->{unsupported});
6022     undef $max_value;
6023     }
6024     if (defined $value_value and $value_value eq '' and
6025     (defined $max_value or defined $min_value)) {
6026     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6027     $self->{onerror}->(node => $value,
6028     type => 'date value not supported', ## TODOC: type
6029     value => $value->value,
6030     level => $self->{level}->{unsupported});
6031     undef $value_value;
6032     }
6033    
6034     if (defined $min_value and defined $max_value) {
6035     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6036     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6037     $self->{onerror}->(node => $max,
6038     type => 'max lt min', ## TODOC: type
6039     level => $self->{level}->{must});
6040     }
6041     }
6042    
6043     if (defined $min_value and defined $value_value) {
6044     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6045     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6046     $self->{onerror}->(node => $value,
6047     type => 'value lt min', ## TODOC: type
6048     level => $self->{level}->{warn});
6049     ## NOTE: Not an error.
6050     }
6051     }
6052    
6053     if (defined $max_value and defined $value_value) {
6054     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6055     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6056     $self->{onerror}->(node => $value,
6057     type => 'value gt max', ## TODOC: type
6058     level => $self->{level}->{warn});
6059     ## NOTE: Not an error.
6060     }
6061     }
6062     } elsif (defined $element_state->{number_value}->{min} or
6063     defined $element_state->{number_value}->{max}) {
6064     my $min_value = $element_state->{number_value}->{min};
6065     my $max_value = $element_state->{number_value}->{max};
6066     my $value_value = $element_state->{number_value}->{value};
6067    
6068     if (defined $min_value and defined $max_value) {
6069     if ($min_value > $max_value) {
6070     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6071     $self->{onerror}->(node => $max,
6072     type => 'max lt min', ## TODOC: type
6073     level => $self->{level}->{must});
6074     }
6075     }
6076    
6077     if (defined $min_value and defined $value_value) {
6078     if ($min_value > $value_value) {
6079     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6080     $self->{onerror}->(node => $value,
6081     type => 'value lt min', ## TODOC: type
6082     level => $self->{level}->{warn});
6083     ## NOTE: Not an error.
6084     }
6085     }
6086    
6087     if (defined $max_value and defined $value_value) {
6088     if ($max_value < $value_value) {
6089     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6090     $self->{onerror}->(node => $value,
6091     type => 'value gt max', ## TODOC: type
6092     level => $self->{level}->{warn});
6093     ## NOTE: Not an error.
6094     }
6095     }
6096     }
6097 wakaba 1.150
6098 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6099    
6100 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6101     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6102     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6103     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6104     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6105 wakaba 1.140 },
6106 wakaba 1.66 check_start => sub {
6107     my ($self, $item, $element_state) = @_;
6108 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6109     $self->{onerror}->(node => $item->{node},
6110     type => 'multiple labelable fae',
6111     level => $self->{level}->{must});
6112     } else {
6113     $self->{flag}->{has_labelable} = 2;
6114     }
6115 wakaba 1.138
6116     $element_state->{id_type} = 'labelable';
6117 wakaba 1.66 },
6118 wakaba 1.52 };
6119    
6120 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
6121    
6122 wakaba 1.80 ## NOTE: "authors who are nesting repetition blocks should position such
6123     ## [repetition-block-related] buttons carefully to make clear which block a
6124 wakaba 1.150 ## button applies to." [WF2]: I have no idea how this can be tested.
6125 wakaba 1.80
6126 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6127 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6128     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6129 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6130 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6131     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6132     ## |button| elements.
6133 wakaba 1.56 action => $HTMLURIAttrChecker,
6134 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6135 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6136 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6137     'application/x-www-form-urlencoded' => 1,
6138     'multipart/form-data' => 1,
6139     'text/plain' => 1,
6140     }),
6141 wakaba 1.136 form => $HTMLFormAttrChecker,
6142 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6143     get => 1, post => 1, put => 1, delete => 1,
6144     }),
6145 wakaba 1.165 name => $FormControlNameAttrChecker,
6146 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6147 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6148     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6149 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6150     target => $HTMLTargetAttrChecker,
6151 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6152     ## attribute to support the |add| button type (as part of repetition
6153     ## template feature). It conflicts with the |template| global attribute
6154     ## introduced as part of the data template feature.
6155     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6156     ## author requirement.
6157 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6158     button => 1, submit => 1, reset => 1,
6159     }),
6160 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6161 wakaba 1.52 }, {
6162     %HTMLAttrStatus,
6163     %HTMLM12NCommonAttrStatus,
6164 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
6165 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6166     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6167 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6168     dataformatas => FEATURE_HTML4_REC_RESERVED,
6169     datasrc => FEATURE_HTML4_REC_RESERVED,
6170 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6171     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6172     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6173 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6174 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6175     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6176 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6177 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6178     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6179 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6180     onforminput => FEATURE_WF2_INFORMATIVE,
6181 wakaba 1.56 replace => FEATURE_WF2,
6182 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6183 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6184 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6185 wakaba 1.119 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6186     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6187 wakaba 1.52 }),
6188 wakaba 1.66 check_start => sub {
6189     my ($self, $item, $element_state) = @_;
6190 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6191     $self->{onerror}->(node => $item->{node},
6192     type => 'multiple labelable fae',
6193     level => $self->{level}->{must});
6194     } else {
6195     $self->{flag}->{has_labelable} = 2;
6196     }
6197 wakaba 1.162
6198     ## ISSUE: "The value attribute must not be present unless the form
6199     ## [content] attribute is present.": Wrong?
6200 wakaba 1.139
6201 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6202     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6203 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6204     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6205 wakaba 1.138
6206     $element_state->{id_type} = 'labelable';
6207 wakaba 1.66 },
6208 wakaba 1.52 };
6209    
6210     $Element->{$HTML_NS}->{label} = {
6211 wakaba 1.139 %HTMLPhrasingContentChecker,
6212 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC
6213     | FEATURE_XHTML2_ED,
6214 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6215 wakaba 1.138 for => sub {
6216     my ($self, $attr) = @_;
6217    
6218     ## NOTE: MUST be an ID of a labelable element.
6219    
6220     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6221     },
6222 wakaba 1.136 form => $HTMLFormAttrChecker,
6223 wakaba 1.52 }, {
6224     %HTMLAttrStatus,
6225 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6226 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2 | FEATURE_M12N10_REC,
6227 wakaba 1.119 for => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6228     form => FEATURE_HTML5_DEFAULT,
6229 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6230 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6231     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6232     }),
6233 wakaba 1.139 check_start => sub {
6234     my ($self, $item, $element_state) = @_;
6235     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6236    
6237     $element_state->{has_label_original} = $self->{flag}->{has_label};
6238     $self->{flag}->{has_label} = 1;
6239     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6240 wakaba 1.155 $self->{flag}->{has_labelable}
6241     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6242 wakaba 1.139
6243     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6244     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6245     },
6246     check_end => sub {
6247     my ($self, $item, $element_state) = @_;
6248     $self->_remove_minus_elements ($element_state);
6249    
6250     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6251     $self->{flag}->{has_labelable}
6252     = $element_state->{has_labelable_original};
6253     }
6254     delete $self->{flag}->{has_label}
6255     unless $element_state->{has_label_original};
6256     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6257    
6258     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6259    
6260     $HTMLPhrasingContentChecker{check_end}->(@_);
6261     },
6262 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6263     };
6264    
6265     $Element->{$HTML_NS}->{select} = {
6266 wakaba 1.121 %HTMLChecker,
6267 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6268 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6269     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6270 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6271 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6272 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6273 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6274 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6275 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6276 wakaba 1.136 form => $HTMLFormAttrChecker,
6277 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6278 wakaba 1.165 name => $FormControlNameAttrChecker,
6279 wakaba 1.163 ## TODO: tests for on*
6280 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6281     onforminput => $HTMLEventHandlerAttrChecker,
6282     oninput => $HTMLEventHandlerAttrChecker,
6283 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6284 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6285 wakaba 1.52 }, {
6286     %HTMLAttrStatus,
6287     %HTMLM12NCommonAttrStatus,
6288 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2,
6289 wakaba 1.119 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6290 wakaba 1.56 data => FEATURE_WF2,
6291 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6292     dataformatas => FEATURE_HTML4_REC_RESERVED,
6293     datasrc => FEATURE_HTML4_REC_RESERVED,
6294 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6295     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6296 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6297 wakaba 1.119 multiple => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6298     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6299 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6300     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6301 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6302     onforminput => FEATURE_WF2_INFORMATIVE,
6303 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6304 wakaba 1.126 oninput => FEATURE_WF2,
6305 wakaba 1.56 oninvalid => FEATURE_WF2,
6306 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6307     sdapref => FEATURE_HTML20_RFC,
6308 wakaba 1.119 size => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6309 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6310     }),
6311 wakaba 1.66 check_start => sub {
6312     my ($self, $item, $element_state) = @_;
6313 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6314     $self->{onerror}->(node => $item->{node},
6315     type => 'multiple labelable fae',
6316     level => $self->{level}->{must});
6317     } else {
6318     $self->{flag}->{has_labelable} = 2;
6319     }
6320 wakaba 1.66
6321     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6322     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6323 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6324     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6325 wakaba 1.138
6326     $element_state->{id_type} = 'labelable';
6327 wakaba 1.66 },
6328 wakaba 1.121 check_child_element => sub {
6329 wakaba 1.163 ## NOTE: (option | optgroup)*
6330    
6331 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6332     $child_is_transparent, $element_state) = @_;
6333 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6334     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6335 wakaba 1.121 $self->{onerror}->(node => $child_el,
6336     type => 'element not allowed:minus',
6337     level => $self->{level}->{must});
6338     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6339     #
6340     } elsif ($child_nsuri eq $HTML_NS and
6341     {
6342     option => 1, optgroup => 1,
6343     }->{$child_ln}) {
6344     #
6345     } else {
6346     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6347     level => $self->{level}->{must});
6348     }
6349     },
6350     check_child_text => sub {
6351     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6352     if ($has_significant) {
6353     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6354     level => $self->{level}->{must});
6355     }
6356     },
6357 wakaba 1.52 };
6358 wakaba 1.1
6359 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6360 wakaba 1.121 %HTMLPhrasingContentChecker,
6361 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6362 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6363     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6364     }, {
6365 wakaba 1.52 %HTMLAttrStatus,
6366 wakaba 1.56 data => FEATURE_WF2,
6367 wakaba 1.52 }),
6368 wakaba 1.66 check_start => sub {
6369     my ($self, $item, $element_state) = @_;
6370    
6371 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6372    
6373 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6374 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6375     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6376 wakaba 1.158
6377     $element_state->{id_type} = 'datalist';
6378 wakaba 1.66 },
6379 wakaba 1.121 ## NOTE: phrasing | option*
6380     check_child_element => sub {
6381     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6382     $child_is_transparent, $element_state) = @_;
6383 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6384     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6385 wakaba 1.121 $self->{onerror}->(node => $child_el,
6386     type => 'element not allowed:minus',
6387     level => $self->{level}->{must});
6388     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6389     #
6390     } elsif ($element_state->{phase} eq 'phrasing') {
6391     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6392     #
6393     } else {
6394     $self->{onerror}->(node => $child_el,
6395     type => 'element not allowed:phrasing',
6396     level => $self->{level}->{must});
6397     }
6398     } elsif ($element_state->{phase} eq 'option') {
6399     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6400     #
6401     } else {
6402     $self->{onerror}->(node => $child_el,
6403     type => 'element not allowed',
6404     level => $self->{level}->{must});
6405     }
6406     } elsif ($element_state->{phase} eq 'any') {
6407     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6408     $element_state->{phase} = 'phrasing';
6409     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6410     $element_state->{phase} = 'option';
6411     } else {
6412     $self->{onerror}->(node => $child_el,
6413     type => 'element not allowed',
6414     level => $self->{level}->{must});
6415     }
6416     } else {
6417     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6418     }
6419     },
6420     check_child_text => sub {
6421     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6422     if ($has_significant) {
6423     if ($element_state->{phase} eq 'phrasing') {
6424     #
6425     } elsif ($element_state->{phase} eq 'any') {
6426     $element_state->{phase} = 'phrasing';
6427     } else {
6428     $self->{onerror}->(node => $child_node,
6429     type => 'character not allowed',
6430     level => $self->{level}->{must});
6431     }
6432     }
6433     },
6434     check_end => sub {
6435     my ($self, $item, $element_state) = @_;
6436     if ($element_state->{phase} eq 'phrasing') {
6437     if ($element_state->{has_significant}) {
6438     $item->{real_parent_state}->{has_significant} = 1;
6439     } elsif ($item->{transparent}) {
6440     #
6441     } else {
6442     $self->{onerror}->(node => $item->{node},
6443     type => 'no significant content',
6444     level => $self->{level}->{should});
6445     }
6446     } else {
6447     ## NOTE: Since the content model explicitly allows a |datalist| element
6448     ## being empty, we don't raise "no significant content" error for this
6449     ## element when there is no element. (We should raise an error for
6450     ## |<datalist><br></datalist>|, however.)
6451     ## NOTE: As a side-effect, when the |datalist| element only contains
6452     ## non-conforming content, then the |phase| flag has not changed from
6453     ## |any|, no "no significant content" error is raised neither.
6454     $HTMLChecker{check_end}->(@_);
6455     }
6456     },
6457 wakaba 1.52 };
6458 wakaba 1.49
6459 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6460 wakaba 1.121 %HTMLChecker,
6461 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6462 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6463     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6464 wakaba 1.164 label => sub {},
6465 wakaba 1.52 }, {
6466     %HTMLAttrStatus,
6467     %HTMLM12NCommonAttrStatus,
6468 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6469     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6470 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6471 wakaba 1.52 }),
6472 wakaba 1.164 check_attrs2 => sub {
6473     my ($self, $item, $element_state) = @_;
6474    
6475     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6476     $self->{onerror}->(node => $item->{node},
6477     type => 'attribute missing',
6478     text => 'label',
6479     level => $self->{level}->{must});
6480     }
6481     },
6482 wakaba 1.121 check_child_element => sub {
6483     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6484     $child_is_transparent, $element_state) = @_;
6485 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6486     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6487 wakaba 1.121 $self->{onerror}->(node => $child_el,
6488     type => 'element not allowed:minus',
6489     level => $self->{level}->{must});
6490     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6491     #
6492     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6493     #
6494     } else {
6495     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6496     level => $self->{level}->{must});
6497     }
6498     },
6499     check_child_text => sub {
6500     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6501     if ($has_significant) {
6502     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6503     level => $self->{level}->{must});
6504     }
6505     },
6506 wakaba 1.52 };
6507    
6508     $Element->{$HTML_NS}->{option} = {
6509     %HTMLTextChecker,
6510 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6511 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6512     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6513 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6514     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6515     value => sub {}, ## NOTE: No restriction.
6516 wakaba 1.52 }, {
6517     %HTMLAttrStatus,
6518     %HTMLM12NCommonAttrStatus,
6519 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6520     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6521 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6522 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6523     sdapref => FEATURE_HTML20_RFC,
6524 wakaba 1.119 selected => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6525     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6526 wakaba 1.52 }),
6527     };
6528 wakaba 1.49
6529 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6530     %HTMLTextChecker,
6531 wakaba 1.121 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6532 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6533 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6534 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6535 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6536 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6537 wakaba 1.136 form => $HTMLFormAttrChecker,
6538 wakaba 1.56 ## TODO: inputmode [WF2]
6539 wakaba 1.164 maxlength => sub {
6540     my ($self, $attr, $item, $element_state) = @_;
6541    
6542     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6543    
6544 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6545 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6546     ## results in a number.
6547     my $max_allowed_value_length = 0+$1;
6548    
6549     ## ISSUE: "The the purposes of this requirement," (typo)
6550    
6551     ## ISSUE: This constraint is applied w/o CRLF normalization to
6552     ## |value| attribute, but w/ CRLF normalization to
6553     ## concept-value.
6554     my $value = $item->{node}->text_content;
6555     if (defined $value) {
6556     my $codepoint_length = length $value;
6557    
6558     if ($codepoint_length > $max_allowed_value_length) {
6559     $self->{onerror}->(node => $item->{node},
6560     type => 'value too long',
6561     level => $self->{level}->{must});
6562     }
6563     }
6564     }
6565     },
6566 wakaba 1.165 name => $FormControlNameAttrChecker,
6567 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6568     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6569     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6570 wakaba 1.161 pattern => $PatternAttrChecker,
6571 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6572 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6573 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6574     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6575     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6576 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6577 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6578 wakaba 1.52 }, {
6579     %HTMLAttrStatus,
6580     %HTMLM12NCommonAttrStatus,
6581 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6582 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6583 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
6584 wakaba 1.121 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6585     cols => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6586 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6587 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6588     datasrc => FEATURE_HTML4_REC_RESERVED,
6589 wakaba 1.121 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6590     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6591 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6592 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6593 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6594     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6595 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6596     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6597     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6598 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6599     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6600     oninput => FEATURE_WF2, ## TODO: tests
6601     oninvalid => FEATURE_WF2, ## TODO: tests
6602 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6603 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6604 wakaba 1.121 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6605     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6606     rows => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6607 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6608     sdapref => FEATURE_HTML20_RFC,
6609 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6610 wakaba 1.121 wrap => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6611 wakaba 1.52 }),
6612 wakaba 1.66 check_start => sub {
6613     my ($self, $item, $element_state) = @_;
6614 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6615     $self->{onerror}->(node => $item->{node},
6616     type => 'multiple labelable fae',
6617     level => $self->{level}->{must});
6618     } else {
6619     $self->{flag}->{has_labelable} = 2;
6620     }
6621 wakaba 1.164
6622     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6623     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6624     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6625    
6626     $element_state->{id_type} = 'labelable';
6627     },
6628     check_attrs2 => sub {
6629     my ($self, $item, $element_state) = @_;
6630 wakaba 1.66
6631 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6632     not $item->{node}->has_attribute_ns (undef, 'title')) {
6633     ## NOTE: WF2 (dropped by HTML5)
6634     $self->{onerror}->(node => $item->{node},
6635     type => 'attribute missing',
6636     text => 'title',
6637     level => $self->{level}->{should});
6638     }
6639    
6640 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6641     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6642     if (defined $wrap) {
6643     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6644     if ($wrap eq 'hard') {
6645     $self->{onerror}->(node => $item->{node},
6646     type => 'attribute missing',
6647     text => 'cols',
6648     level => $self->{level}->{must});
6649     }
6650     }
6651     }
6652 wakaba 1.66 },
6653 wakaba 1.52 };
6654 wakaba 1.49
6655 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6656 wakaba 1.121 %HTMLPhrasingContentChecker,
6657     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6658 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6659 wakaba 1.165 for => sub {
6660     my ($self, $attr) = @_;
6661    
6662     ## NOTE: "Unordered set of unique space-separated tokens".
6663    
6664     my %word;
6665     for my $word (grep {length $_}
6666     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6667     unless ($word{$word}) {
6668     $word{$word} = 1;
6669     push @{$self->{idref}}, ['any', $word, $attr];
6670     } else {
6671     $self->{onerror}->(node => $attr, type => 'duplicate token',
6672     value => $word,
6673     level => $self->{level}->{must});
6674     }
6675     }
6676     },
6677 wakaba 1.136 form => $HTMLFormAttrChecker,
6678 wakaba 1.165 name => $FormControlNameAttrChecker,
6679     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6680     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6681 wakaba 1.56 }, {
6682 wakaba 1.52 %HTMLAttrStatus,
6683 wakaba 1.121 for => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6684     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6685     name => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6686 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6687     onformchange => FEATURE_WF2,
6688     onforminput => FEATURE_WF2,
6689 wakaba 1.52 }),
6690     };
6691    
6692     $Element->{$HTML_NS}->{isindex} = {
6693     %HTMLEmptyChecker,
6694 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6695     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6696 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6697     prompt => sub {}, ## NOTE: Text [M12N]
6698     }, {
6699     %HTMLAttrStatus,
6700 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6701     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6702     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6703     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6704 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6705 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6706 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6707     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6708 wakaba 1.52 }),
6709     ## TODO: Tests
6710     ## TODO: Tests for <nest/> in <isindex>
6711 wakaba 1.66 check_start => sub {
6712     my ($self, $item, $element_state) = @_;
6713    
6714     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6715 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6716     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6717 wakaba 1.66 },
6718 wakaba 1.52 };
6719 wakaba 1.49
6720 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6721 wakaba 1.40 %HTMLChecker,
6722 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6723 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6724 wakaba 1.91 charset => sub {
6725     my ($self, $attr) = @_;
6726    
6727     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6728     $self->{onerror}->(type => 'attribute not allowed',
6729     node => $attr,
6730 wakaba 1.104 level => $self->{level}->{must});
6731 wakaba 1.91 }
6732    
6733     $HTMLCharsetChecker->($attr->value, @_);
6734     },
6735 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6736 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6737 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6738     async => $GetHTMLBooleanAttrChecker->('async'),
6739 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6740 wakaba 1.49 }, {
6741     %HTMLAttrStatus,
6742 wakaba 1.153 async => FEATURE_HTML5_WD,
6743     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6744     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6745 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6746     for => FEATURE_HTML4_REC_RESERVED,
6747 wakaba 1.154 href => FEATURE_RDFA_REC,
6748 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6749 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6750 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6751     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6752 wakaba 1.9 }),
6753 wakaba 1.40 check_start => sub {
6754     my ($self, $item, $element_state) = @_;
6755 wakaba 1.1
6756 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6757     $element_state->{must_be_empty} = 1;
6758 wakaba 1.1 } else {
6759     ## NOTE: No content model conformance in HTML5 spec.
6760 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6761     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6762 wakaba 1.1 if ((defined $type and $type eq '') or
6763     (defined $language and $language eq '')) {
6764     $type = 'text/javascript';
6765     } elsif (defined $type) {
6766     #
6767     } elsif (defined $language) {
6768     $type = 'text/' . $language;
6769     } else {
6770     $type = 'text/javascript';
6771     }
6772 wakaba 1.93
6773     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6774     $type = "$1/$2";
6775     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6776     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6777     }
6778     $element_state->{script_type} = $type;
6779 wakaba 1.40 }
6780 wakaba 1.66
6781     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6782 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6783     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6784 wakaba 1.107
6785     $element_state->{text} = '';
6786 wakaba 1.40 },
6787     check_child_element => sub {
6788     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6789     $child_is_transparent, $element_state) = @_;
6790 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6791     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6792 wakaba 1.40 $self->{onerror}->(node => $child_el,
6793     type => 'element not allowed:minus',
6794 wakaba 1.104 level => $self->{level}->{must});
6795 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6796     #
6797     } else {
6798     if ($element_state->{must_be_empty}) {
6799     $self->{onerror}->(node => $child_el,
6800 wakaba 1.104 type => 'element not allowed:empty',
6801     level => $self->{level}->{must});
6802 wakaba 1.40 }
6803     }
6804     },
6805     check_child_text => sub {
6806     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6807     if ($has_significant and
6808     $element_state->{must_be_empty}) {
6809     $self->{onerror}->(node => $child_node,
6810 wakaba 1.104 type => 'character not allowed:empty',
6811     level => $self->{level}->{must});
6812 wakaba 1.40 }
6813 wakaba 1.115 $element_state->{text} .= $child_node->data;
6814 wakaba 1.40 },
6815     check_end => sub {
6816     my ($self, $item, $element_state) = @_;
6817     unless ($element_state->{must_be_empty}) {
6818 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6819     ## NOTE: XML content should be checked by THIS instance of checker
6820     ## as part of normal tree validation.
6821 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6822     type => 'XML script lang',
6823     text => $element_state->{script_type},
6824     level => $self->{level}->{uncertain});
6825     ## ISSUE: Should we raise some kind of error for
6826     ## <script type="text/xml">aaaaa</script>?
6827     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6828 wakaba 1.93 } else {
6829     $self->{onsubdoc}->({s => $element_state->{text},
6830     container_node => $item->{node},
6831     media_type => $element_state->{script_type},
6832     is_char_string => 1});
6833     }
6834 wakaba 1.40
6835     $HTMLChecker{check_end}->(@_);
6836 wakaba 1.1 }
6837     },
6838 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6839     ## NOTE: "When used to include script data, the script data must be embedded
6840     ## inline, the format of the data must be given using the type attribute,
6841     ## and the src attribute must not be specified." - not testable.
6842     ## TODO: It would be possible to err <script type=text/plain src=...>
6843 wakaba 1.1 };
6844 wakaba 1.25 ## ISSUE: Significant check and text child node
6845 wakaba 1.1
6846     ## NOTE: When script is disabled.
6847     $Element->{$HTML_NS}->{noscript} = {
6848 wakaba 1.40 %HTMLTransparentChecker,
6849 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6850 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6851     %HTMLAttrStatus,
6852     %HTMLM12NCommonAttrStatus,
6853 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6854 wakaba 1.49 }),
6855 wakaba 1.40 check_start => sub {
6856     my ($self, $item, $element_state) = @_;
6857 wakaba 1.3
6858 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6859 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6860     level => $self->{level}->{must});
6861 wakaba 1.3 }
6862    
6863 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6864     $self->_add_minus_elements ($element_state,
6865     {$HTML_NS => {noscript => 1}});
6866     }
6867 wakaba 1.79
6868     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6869     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6870 wakaba 1.3 },
6871 wakaba 1.40 check_child_element => sub {
6872     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6873     $child_is_transparent, $element_state) = @_;
6874     if ($self->{flag}->{in_head}) {
6875 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6876     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6877 wakaba 1.40 $self->{onerror}->(node => $child_el,
6878     type => 'element not allowed:minus',
6879 wakaba 1.104 level => $self->{level}->{must});
6880 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6881     #
6882     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6883     #
6884     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6885     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6886     $self->{onerror}->(node => $child_el,
6887     type => 'element not allowed:head noscript',
6888 wakaba 1.104 level => $self->{level}->{must});
6889 wakaba 1.40 }
6890     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6891 wakaba 1.47 my $http_equiv_attr
6892     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6893     if ($http_equiv_attr) {
6894     ## TODO: case
6895     if (lc $http_equiv_attr->value eq 'content-type') {
6896 wakaba 1.40 $self->{onerror}->(node => $child_el,
6897 wakaba 1.34 type => 'element not allowed:head noscript',
6898 wakaba 1.104 level => $self->{level}->{must});
6899 wakaba 1.47 } else {
6900     #
6901 wakaba 1.3 }
6902 wakaba 1.47 } else {
6903     $self->{onerror}->(node => $child_el,
6904     type => 'element not allowed:head noscript',
6905 wakaba 1.104 level => $self->{level}->{must});
6906 wakaba 1.3 }
6907 wakaba 1.40 } else {
6908     $self->{onerror}->(node => $child_el,
6909     type => 'element not allowed:head noscript',
6910 wakaba 1.104 level => $self->{level}->{must});
6911 wakaba 1.40 }
6912     } else {
6913     $HTMLTransparentChecker{check_child_element}->(@_);
6914     }
6915     },
6916     check_child_text => sub {
6917     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6918     if ($self->{flag}->{in_head}) {
6919     if ($has_significant) {
6920     $self->{onerror}->(node => $child_node,
6921 wakaba 1.104 type => 'character not allowed',
6922     level => $self->{level}->{must});
6923 wakaba 1.3 }
6924     } else {
6925 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6926     }
6927     },
6928     check_end => sub {
6929     my ($self, $item, $element_state) = @_;
6930     $self->_remove_minus_elements ($element_state);
6931     if ($self->{flag}->{in_head}) {
6932     $HTMLChecker{check_end}->(@_);
6933     } else {
6934     $HTMLPhrasingContentChecker{check_end}->(@_);
6935 wakaba 1.3 }
6936 wakaba 1.1 },
6937     };
6938 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6939 wakaba 1.1
6940     $Element->{$HTML_NS}->{'event-source'} = {
6941 wakaba 1.40 %HTMLEmptyChecker,
6942 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6943     check_attrs => $GetHTMLAttrsChecker->({
6944     src => $HTMLURIAttrChecker,
6945     }, {
6946     %HTMLAttrStatus,
6947     src => FEATURE_HTML5_LC_DROPPED,
6948     }),
6949     check_start => sub {
6950     my ($self, $item, $element_state) = @_;
6951    
6952     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6953     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6954     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6955     },
6956     };
6957    
6958     $Element->{$HTML_NS}->{eventsource} = {
6959     %HTMLEmptyChecker,
6960 wakaba 1.153 status => FEATURE_HTML5_WD,
6961 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6962 wakaba 1.1 src => $HTMLURIAttrChecker,
6963 wakaba 1.50 }, {
6964     %HTMLAttrStatus,
6965 wakaba 1.153 src => FEATURE_HTML5_WD,
6966 wakaba 1.1 }),
6967 wakaba 1.66 check_start => sub {
6968     my ($self, $item, $element_state) = @_;
6969    
6970     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6971 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6972     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6973 wakaba 1.66 },
6974 wakaba 1.1 };
6975    
6976     $Element->{$HTML_NS}->{details} = {
6977 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
6978 wakaba 1.153 status => FEATURE_HTML5_LC,
6979 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6980 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
6981 wakaba 1.50 }, {
6982     %HTMLAttrStatus,
6983 wakaba 1.153 open => FEATURE_HTML5_LC,
6984 wakaba 1.1 }),
6985     };
6986    
6987     $Element->{$HTML_NS}->{datagrid} = {
6988 wakaba 1.72 %HTMLFlowContentChecker,
6989 wakaba 1.48 status => FEATURE_HTML5_WD,
6990 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6991 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6992     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6993 wakaba 1.50 }, {
6994     %HTMLAttrStatus,
6995     disabled => FEATURE_HTML5_WD,
6996     multiple => FEATURE_HTML5_WD,
6997 wakaba 1.1 }),
6998 wakaba 1.40 check_start => sub {
6999     my ($self, $item, $element_state) = @_;
7000 wakaba 1.1
7001 wakaba 1.40 $self->_add_minus_elements ($element_state,
7002     {$HTML_NS => {a => 1, datagrid => 1}});
7003 wakaba 1.172
7004 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7005     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7006 wakaba 1.40 },
7007     check_end => sub {
7008     my ($self, $item, $element_state) = @_;
7009     $self->_remove_minus_elements ($element_state);
7010 wakaba 1.1
7011 wakaba 1.172 $HTMLFlowContentChecker{check_end}->(@_);
7012 wakaba 1.40 },
7013 wakaba 1.1 };
7014    
7015     $Element->{$HTML_NS}->{command} = {
7016 wakaba 1.40 %HTMLEmptyChecker,
7017 wakaba 1.48 status => FEATURE_HTML5_WD,
7018 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7019 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7020     default => $GetHTMLBooleanAttrChecker->('default'),
7021     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7022     icon => $HTMLURIAttrChecker,
7023     label => sub { }, ## NOTE: No conformance creteria
7024     radiogroup => sub { }, ## NOTE: No conformance creteria
7025     type => sub {
7026     my ($self, $attr) = @_;
7027     my $value = $attr->value;
7028     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7029 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7030     level => $self->{level}->{must});
7031 wakaba 1.1 }
7032     },
7033 wakaba 1.50 }, {
7034     %HTMLAttrStatus,
7035     checked => FEATURE_HTML5_WD,
7036 wakaba 1.175 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7037 wakaba 1.50 disabled => FEATURE_HTML5_WD,
7038     icon => FEATURE_HTML5_WD,
7039     label => FEATURE_HTML5_WD,
7040     radiogroup => FEATURE_HTML5_WD,
7041     type => FEATURE_HTML5_WD,
7042 wakaba 1.1 }),
7043 wakaba 1.66 check_start => sub {
7044     my ($self, $item, $element_state) = @_;
7045    
7046     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7047 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7048     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7049 wakaba 1.66 },
7050 wakaba 1.115 };
7051    
7052     $Element->{$HTML_NS}->{bb} = {
7053     %HTMLPhrasingContentChecker,
7054 wakaba 1.153 status => FEATURE_HTML5_WD,
7055 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7056     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7057     }, {
7058     %HTMLAttrStatus,
7059 wakaba 1.153 type => FEATURE_HTML5_WD,
7060 wakaba 1.115 }),
7061 wakaba 1.130 check_start => sub {
7062     my ($self, $item, $element_state) = @_;
7063     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7064    
7065     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7066     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7067     },
7068     check_end => sub {
7069     my ($self, $item, $element_state) = @_;
7070     $self->_remove_minus_elements ($element_state);
7071    
7072     $HTMLTransparentChecker{check_end}->(@_);
7073     },
7074 wakaba 1.1 };
7075    
7076     $Element->{$HTML_NS}->{menu} = {
7077 wakaba 1.40 %HTMLPhrasingContentChecker,
7078 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7079     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7080     ## NOTE: We don't want any |menu| element warned as deprecated.
7081 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7082 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7083 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7084 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7085     ## implementation, it does not match.)
7086 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7087     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7088 wakaba 1.49 }, {
7089     %HTMLAttrStatus,
7090     %HTMLM12NCommonAttrStatus,
7091 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7092 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7093 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7094 wakaba 1.50 label => FEATURE_HTML5_WD,
7095 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7096 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7097     sdapref => FEATURE_HTML20_RFC,
7098 wakaba 1.50 type => FEATURE_HTML5_WD,
7099 wakaba 1.1 }),
7100 wakaba 1.40 check_start => sub {
7101     my ($self, $item, $element_state) = @_;
7102     $element_state->{phase} = 'li or phrasing';
7103     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7104     $self->{flag}->{in_menu} = 1;
7105 wakaba 1.79
7106     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7107     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7108 wakaba 1.135 $element_state->{id_type} = 'menu';
7109 wakaba 1.40 },
7110     check_child_element => sub {
7111     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7112     $child_is_transparent, $element_state) = @_;
7113 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7114     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7115 wakaba 1.40 $self->{onerror}->(node => $child_el,
7116     type => 'element not allowed:minus',
7117 wakaba 1.104 level => $self->{level}->{must});
7118 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7119     #
7120     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7121     if ($element_state->{phase} eq 'li') {
7122     #
7123     } elsif ($element_state->{phase} eq 'li or phrasing') {
7124     $element_state->{phase} = 'li';
7125     } else {
7126 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7127     level => $self->{level}->{must});
7128 wakaba 1.40 }
7129     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7130     if ($element_state->{phase} eq 'phrasing') {
7131     #
7132     } elsif ($element_state->{phase} eq 'li or phrasing') {
7133     $element_state->{phase} = 'phrasing';
7134     } else {
7135 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7136     level => $self->{level}->{must});
7137 wakaba 1.40 }
7138     } else {
7139 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7140     level => $self->{level}->{must});
7141 wakaba 1.40 }
7142     },
7143     check_child_text => sub {
7144     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7145     if ($has_significant) {
7146     if ($element_state->{phase} eq 'phrasing') {
7147     #
7148     } elsif ($element_state->{phase} eq 'li or phrasing') {
7149     $element_state->{phase} = 'phrasing';
7150     } else {
7151     $self->{onerror}->(node => $child_node,
7152 wakaba 1.104 type => 'character not allowed',
7153     level => $self->{level}->{must});
7154 wakaba 1.1 }
7155     }
7156 wakaba 1.40 },
7157     check_end => sub {
7158     my ($self, $item, $element_state) = @_;
7159     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7160    
7161     if ($element_state->{phase} eq 'li') {
7162     $HTMLChecker{check_end}->(@_);
7163     } else { # 'phrasing' or 'li or phrasing'
7164     $HTMLPhrasingContentChecker{check_end}->(@_);
7165 wakaba 1.1 }
7166     },
7167 wakaba 1.8 };
7168    
7169     $Element->{$HTML_NS}->{datatemplate} = {
7170 wakaba 1.40 %HTMLChecker,
7171 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7172 wakaba 1.40 check_child_element => sub {
7173     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7174     $child_is_transparent, $element_state) = @_;
7175 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7176     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7177 wakaba 1.40 $self->{onerror}->(node => $child_el,
7178     type => 'element not allowed:minus',
7179 wakaba 1.104 level => $self->{level}->{must});
7180 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7181     #
7182     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7183     #
7184     } else {
7185     $self->{onerror}->(node => $child_el,
7186 wakaba 1.104 type => 'element not allowed:datatemplate',
7187     level => $self->{level}->{must});
7188 wakaba 1.40 }
7189     },
7190     check_child_text => sub {
7191     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7192     if ($has_significant) {
7193 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7194     level => $self->{level}->{must});
7195 wakaba 1.8 }
7196     },
7197     is_xml_root => 1,
7198     };
7199    
7200     $Element->{$HTML_NS}->{rule} = {
7201 wakaba 1.40 %HTMLChecker,
7202 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7203 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7204 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7205 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7206 wakaba 1.50 }, {
7207     %HTMLAttrStatus,
7208     condition => FEATURE_HTML5_AT_RISK,
7209     mode => FEATURE_HTML5_AT_RISK,
7210 wakaba 1.8 }),
7211 wakaba 1.40 check_start => sub {
7212     my ($self, $item, $element_state) = @_;
7213 wakaba 1.79
7214 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7215 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7216     $self->{flag}->{in_rule} = 1;
7217    
7218     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7219     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7220 wakaba 1.40 },
7221     check_child_element => sub { },
7222     check_child_text => sub { },
7223     check_end => sub {
7224     my ($self, $item, $element_state) = @_;
7225 wakaba 1.79
7226 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7227 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7228    
7229 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7230 wakaba 1.8 },
7231     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7232     ## is applied to some conforming data, results in a conforming DOM tree.":
7233     ## We don't check against this.
7234     };
7235    
7236     $Element->{$HTML_NS}->{nest} = {
7237 wakaba 1.40 %HTMLEmptyChecker,
7238 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7239 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7240 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7241     mode => sub {
7242     my ($self, $attr) = @_;
7243     my $value = $attr->value;
7244 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7245 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7246     level => $self->{level}->{must});
7247 wakaba 1.23 }
7248     },
7249 wakaba 1.50 }, {
7250     %HTMLAttrStatus,
7251     filter => FEATURE_HTML5_AT_RISK,
7252     mode => FEATURE_HTML5_AT_RISK,
7253 wakaba 1.8 }),
7254 wakaba 1.1 };
7255    
7256     $Element->{$HTML_NS}->{legend} = {
7257 wakaba 1.40 %HTMLPhrasingContentChecker,
7258 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7259 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7260     # align => $GetHTMLEnumeratedAttrChecker->({
7261     # top => 1, bottom => 1, left => 1, right => 1,
7262     # }),
7263 wakaba 1.167 form => $HTMLFormAttrChecker,
7264 wakaba 1.52 }, {
7265 wakaba 1.49 %HTMLAttrStatus,
7266     %HTMLM12NCommonAttrStatus,
7267 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7268 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7269 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7270 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7271 wakaba 1.49 }),
7272 wakaba 1.170 check_child_element => sub {
7273     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7274     $child_is_transparent, $element_state) = @_;
7275     if ($item->{parent_state}->{in_figure}) {
7276     $HTMLFlowContentChecker{check_child_element}->(@_);
7277     } else {
7278     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7279     }
7280     },
7281     check_child_text => sub {
7282     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7283     if ($item->{parent_state}->{in_figure}) {
7284     $HTMLFlowContentChecker{check_child_text}->(@_);
7285     } else {
7286     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7287     }
7288     },
7289     check_start => sub {
7290     my ($self, $item, $element_state) = @_;
7291     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7292    
7293     $HTMLFlowContentChecker{check_start}->(@_);
7294     },
7295     check_end => sub {
7296     my ($self, $item, $element_state) = @_;
7297     $self->_remove_minus_elements ($element_state);
7298    
7299     $HTMLFlowContentChecker{check_end}->(@_);
7300     },
7301     }; # legend
7302 wakaba 1.1
7303     $Element->{$HTML_NS}->{div} = {
7304 wakaba 1.72 %HTMLFlowContentChecker,
7305 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
7306 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7307     align => $GetHTMLEnumeratedAttrChecker->({
7308     left => 1, center => 1, right => 1, justify => 1,
7309     }),
7310     }, {
7311 wakaba 1.49 %HTMLAttrStatus,
7312 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7313 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7314     datafld => FEATURE_HTML4_REC_RESERVED,
7315     dataformatas => FEATURE_HTML4_REC_RESERVED,
7316     datasrc => FEATURE_HTML4_REC_RESERVED,
7317 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7318 wakaba 1.49 }),
7319 wakaba 1.66 check_start => sub {
7320     my ($self, $item, $element_state) = @_;
7321    
7322     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7323 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7324     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7325 wakaba 1.66 },
7326 wakaba 1.1 };
7327    
7328 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7329 wakaba 1.72 %HTMLFlowContentChecker,
7330 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7331     check_attrs => $GetHTMLAttrsChecker->({}, {
7332     %HTMLAttrStatus,
7333     %HTMLM12NCommonAttrStatus,
7334 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7335 wakaba 1.64 }),
7336     };
7337    
7338 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7339 wakaba 1.40 %HTMLTransparentChecker,
7340 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7341 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7342     ## TODO: HTML4 |size|, |color|, |face|
7343 wakaba 1.49 }, {
7344     %HTMLAttrStatus,
7345 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7346 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7347 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7348 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7349 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7350     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7351 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7352 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7353     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7354 wakaba 1.49 }),
7355 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7356     ## it is allowed only in a document with the WYSIWYG signature. The
7357     ## checker does not check whether there is the signature, since the
7358     ## signature is dropped, too, and has never been implemented. (In addition,
7359     ## for any |font| element an "element not defined" error is raised anyway,
7360     ## such that we don't have to raise an additional error.)
7361 wakaba 1.1 };
7362 wakaba 1.49
7363 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7364     %HTMLEmptyChecker,
7365     status => FEATURE_M12N10_REC_DEPRECATED,
7366     check_attrs => $GetHTMLAttrsChecker->({
7367     ## TODO: color, face, size
7368     }, {
7369     %HTMLAttrStatus,
7370     color => FEATURE_M12N10_REC_DEPRECATED,
7371     face => FEATURE_M12N10_REC_DEPRECATED,
7372 wakaba 1.153 #id => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
7373     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7374 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7375     }),
7376     };
7377    
7378 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7379     ## class title id cols rows onload onunload style(x10)
7380     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7381     ## noframes Common, lang(xhtml10)
7382    
7383 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7384 wakaba 1.56
7385 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7386     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7387     ## xmp, listing sdapref[HTML2,0]
7388    
7389 wakaba 1.56 =pod
7390    
7391 wakaba 1.61 HTML 2.0 nextid @n
7392    
7393     RFC 2659: CERTS CRYPTOPTS
7394    
7395     ISO-HTML: pre-html, divN
7396 wakaba 1.82
7397     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7398     di (Common), nl (Common), handler (Common, type), standby (Common),
7399     summary (Common)
7400    
7401 wakaba 1.97 Access & XHTML2: access (LC)
7402 wakaba 1.82
7403     XML Events & XForms (for XHTML2 support; very, very low priority)
7404 wakaba 1.61
7405 wakaba 1.56 =cut
7406 wakaba 1.61
7407     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7408     ## We added them only to |a|. |link| and |form| might also allow them
7409     ## in theory.
7410 wakaba 1.1
7411     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7412    
7413     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24