/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.187 - (hide annotations) (download)
Sun Jul 26 14:10:59 2009 UTC (15 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.186: +272 -269 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	26 Jul 2009 14:10:47 -0000
	* HTML.pm: Updated element and attribute status.

2009-07-26  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.174 ## --- Feature Status ---
10    
11 wakaba 1.187 sub FEATURE_HTML5_REC () {
12 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
13 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15 wakaba 1.187
16     ## Strictly speaking, HTML5's "implemented and widely deployed"
17     ## status does not necessarily satisfy the condition for
18     ## FEATURE_STATUS_REC, since there is no test cases for most of
19     ## features marked as "implemented" in HTML5. Nevertheless, we
20     ## special-case HTML5's this status as if that had passed the CR
21     ## phase, considering HTML's history.
22 wakaba 1.89 }
23 wakaba 1.187
24 wakaba 1.154 sub FEATURE_HTML5_CR () {
25     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
26     Whatpm::ContentChecker::FEATURE_STATUS_CR |
27     Whatpm::ContentChecker::FEATURE_ALLOWED
28     }
29 wakaba 1.54 sub FEATURE_HTML5_LC () {
30 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
31 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
32     Whatpm::ContentChecker::FEATURE_ALLOWED
33     }
34     sub FEATURE_HTML5_AT_RISK () {
35 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
36     ## status.
37 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40     sub FEATURE_HTML5_WD () {
41 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
42 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
43     Whatpm::ContentChecker::FEATURE_ALLOWED
44     }
45     sub FEATURE_HTML5_FD () {
46 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
47 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
48     Whatpm::ContentChecker::FEATURE_ALLOWED
49     }
50     sub FEATURE_HTML5_DEFAULT () {
51 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
52 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
53     Whatpm::ContentChecker::FEATURE_ALLOWED
54 wakaba 1.49 }
55 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
56 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
57     ## comments, but then dropped.
58 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
59     }
60 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
61 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
62     ## then dropped.
63 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
64     }
65 wakaba 1.154
66 wakaba 1.119 sub FEATURE_WF2X () {
67 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
68     ## incorporated into the HTML5 spec.
69 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
70     }
71 wakaba 1.54 sub FEATURE_WF2 () {
72 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
73     ## merged into HTML5.
74 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
75 wakaba 1.54 }
76 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
77 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
78     ## were not merged into HTML5.
79 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
80     }
81 wakaba 1.49
82 wakaba 1.154 sub FEATURE_RDFA_REC () {
83     Whatpm::ContentChecker::FEATURE_STATUS_REC
84 wakaba 1.121 }
85 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
86     ## NOTE: The feature that was defined in a RDFa last call working
87     ## draft, but then dropped.
88 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
89     }
90 wakaba 1.58
91     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
92     ## attribute can be used- the only requirements for that matter is:
93     ## "the attribute MUST be referenced using its namespace-qualified form" (and
94     ## this is a host language conformance!).
95 wakaba 1.82 sub FEATURE_ROLE_LC () {
96     Whatpm::ContentChecker::FEATURE_STATUS_LC
97     }
98    
99     sub FEATURE_XHTML2_ED () {
100 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
101     ## "http://www.w3.org/1999/xhtml".
102 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
103     }
104 wakaba 1.58
105 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
106 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
107     ## M12N).
108     Whatpm::ContentChecker::FEATURE_STATUS_REC
109 wakaba 1.55 }
110     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
111 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
112     ## features.
113     Whatpm::ContentChecker::FEATURE_STATUS_REC |
114 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
115     }
116    
117 wakaba 1.154 sub FEATURE_RUBY_REC () {
118     Whatpm::ContentChecker::FEATURE_STATUS_CR
119 wakaba 1.82 }
120    
121 wakaba 1.154 sub FEATURE_M12N11_LC () {
122     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
123     Whatpm::ContentChecker::FEATURE_STATUS_REC;
124 wakaba 1.99 }
125    
126 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
127     ## It contains a number of problems. (However, again, it's a REC!)
128 wakaba 1.54 sub FEATURE_M12N10_REC () {
129 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
130 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
131     }
132     sub FEATURE_M12N10_REC_DEPRECATED () {
133     Whatpm::ContentChecker::FEATURE_STATUS_REC |
134     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
135     }
136 wakaba 1.49
137     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
138     ## (second edition). Only missing attributes from M12N10 abstract
139     ## definition are added.
140 wakaba 1.54 sub FEATURE_XHTML10_REC () {
141     Whatpm::ContentChecker::FEATURE_STATUS_CR
142     }
143    
144 wakaba 1.61 ## NOTE: Diff from HTML4.
145     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
146     Whatpm::ContentChecker::FEATURE_STATUS_CR
147     }
148 wakaba 1.58
149 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
150     ## 4.01). Only missing attributes from XHTML10 are added.
151 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
152     Whatpm::ContentChecker::FEATURE_STATUS_WD
153     }
154    
155     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
156     ## rather than presentational attributes (deprecated or not deprecated).
157 wakaba 1.48
158 wakaba 1.61 ## NOTE: Diff from HTML4.
159     sub FEATURE_HTML32_REC_OBSOLETE () {
160     Whatpm::ContentChecker::FEATURE_STATUS_CR |
161     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
162     ## NOTE: Lowercase normative "should".
163     }
164    
165     sub FEATURE_RFC2659 () { ## Experimental RFC
166     Whatpm::ContentChecker::FEATURE_STATUS_CR
167     }
168    
169     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
170     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
171     Whatpm::ContentChecker::FEATURE_STATUS_CR
172     }
173    
174     ## NOTE: Diff from HTML 2.0.
175     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
176     Whatpm::ContentChecker::FEATURE_STATUS_CR
177     }
178    
179     ## NOTE: Diff from HTML 3.2.
180     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
181     Whatpm::ContentChecker::FEATURE_STATUS_CR
182     }
183 wakaba 1.58
184 wakaba 1.174 ## --- Content Model ---
185    
186 wakaba 1.29 ## December 2007 HTML5 Classification
187    
188     my $HTMLMetadataContent = {
189     $HTML_NS => {
190     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
191 wakaba 1.118 'event-source' => 1, eventsource => 1,
192     command => 1, datatemplate => 1,
193 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
194     ## a metadata content other than |head| element.
195     meta => 1,
196     },
197     ## NOTE: RDF is mentioned in the HTML5 spec.
198     ## TODO: Other RDF elements?
199     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
200     };
201    
202 wakaba 1.72 my $HTMLFlowContent = {
203 wakaba 1.29 $HTML_NS => {
204     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
205     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
206     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
207     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
208 wakaba 1.119 form => 1, fieldset => 1,
209 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
210     datagrid => 1, ## ISSUE: "Flow element" in spec.
211 wakaba 1.29 datatemplate => 1,
212     div => 1, ## ISSUE: No category in spec.
213     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
214     ## Additionally, it must be before any other element or
215     ## non-inter-element-whitespace text node.
216     style => 1,
217    
218 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
219 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
220     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
221 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
222 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
223     command => 1, bb => 1,
224 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
225 wakaba 1.121 textarea => 1, output => 1,
226 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
227     ## NOTE: |area| is allowed only as a descendant of |map|.
228     area => 1,
229    
230 wakaba 1.124 ## NOTE: Transparent.
231     a => 1, ins => 1, del => 1, font => 1,
232 wakaba 1.29
233 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
234 wakaba 1.29 menu => 1,
235    
236     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
237     canvas => 1,
238     },
239    
240     ## NOTE: Embedded
241     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
242     q<http://www.w3.org/2000/svg> => {svg => 1},
243     };
244    
245 wakaba 1.58 my $HTMLSectioningContent = {
246 wakaba 1.57 $HTML_NS => {
247     section => 1, nav => 1, article => 1, aside => 1,
248     ## NOTE: |body| is only allowed in |html| element.
249     body => 1,
250     },
251     };
252    
253 wakaba 1.58 my $HTMLSectioningRoot = {
254 wakaba 1.29 $HTML_NS => {
255 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
256 wakaba 1.29 },
257     };
258    
259     my $HTMLHeadingContent = {
260     $HTML_NS => {
261     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
262     },
263     };
264    
265     my $HTMLPhrasingContent = {
266 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
267 wakaba 1.29 $HTML_NS => {
268 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
269 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
270     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
271 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
272 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
273     command => 1, bb => 1,
274 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
275 wakaba 1.121 textarea => 1, output => 1,
276 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
277     ## NOTE: |area| is allowed only as a descendant of |map|.
278     area => 1,
279    
280     ## NOTE: Transparent.
281 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
282 wakaba 1.29
283 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
284 wakaba 1.29 menu => 1,
285    
286     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
287     canvas => 1,
288     },
289    
290     ## NOTE: Embedded
291     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
292     q<http://www.w3.org/2000/svg> => {svg => 1},
293    
294     ## NOTE: And non-inter-element-whitespace text nodes.
295     };
296    
297 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
298 wakaba 1.29
299     my $HTMLInteractiveContent = {
300     $HTML_NS => {
301     a => 1,
302 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
303     details => 1, datagrid => 1, bb => 1,
304    
305     ## NOTE: When "controls" attribute is specified.
306     video => 1, audio => 1,
307    
308     ## NOTE: When "type=toolbar" attribute is specified.
309     menu => 1,
310 wakaba 1.29 },
311     };
312    
313 wakaba 1.139 ## NOTE: Labelable form-associated element.
314     my $LabelableFAE = {
315     $HTML_NS => {
316     input => 1, button => 1, select => 1, textarea => 1,
317     },
318     };
319    
320 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
321    
322 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
323     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
324    
325     ## -- Common attribute syntacx checkers
326    
327 wakaba 1.1 our $AttrChecker;
328 wakaba 1.82 our $AttrStatus;
329 wakaba 1.1
330     my $GetHTMLEnumeratedAttrChecker = sub {
331     my $states = shift; # {value => conforming ? 1 : -1}
332     return sub {
333     my ($self, $attr) = @_;
334     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
335     if ($states->{$value} > 0) {
336     #
337     } elsif ($states->{$value}) {
338 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
339     level => $self->{level}->{must});
340 wakaba 1.1 } else {
341 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
342     level => $self->{level}->{must});
343 wakaba 1.1 }
344     };
345     }; # $GetHTMLEnumeratedAttrChecker
346    
347     my $GetHTMLBooleanAttrChecker = sub {
348     my $local_name = shift;
349     return sub {
350     my ($self, $attr) = @_;
351 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
352 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
353 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
354 wakaba 1.104 level => $self->{level}->{must});
355 wakaba 1.1 }
356     };
357     }; # $GetHTMLBooleanAttrChecker
358    
359 wakaba 1.8 ## Unordered set of space-separated tokens
360 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
361     my $allowed_words = shift;
362     return sub {
363     my ($self, $attr) = @_;
364     my %word;
365 wakaba 1.132 for my $word (grep {length $_}
366     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
367 wakaba 1.92 unless ($word{$word}) {
368     $word{$word} = 1;
369     if (not defined $allowed_words or
370     $allowed_words->{$word}) {
371     #
372     } else {
373 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
374 wakaba 1.92 value => $word,
375 wakaba 1.104 level => $self->{level}->{must});
376 wakaba 1.92 }
377     } else {
378 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
379     value => $word,
380     level => $self->{level}->{must});
381 wakaba 1.92 }
382 wakaba 1.8 }
383 wakaba 1.92 };
384     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
385 wakaba 1.8
386 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
387 wakaba 1.1 ## whose allowed values are defined by the section on link types)
388     my $HTMLLinkTypesAttrChecker = sub {
389 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
390 wakaba 1.1 my %word;
391 wakaba 1.132 for my $word (grep {length $_}
392     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
393 wakaba 1.1 unless ($word{$word}) {
394     $word{$word} = 1;
395 wakaba 1.18 } elsif ($word eq 'up') {
396     #
397 wakaba 1.1 } else {
398 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
399     value => $word,
400     level => $self->{level}->{must});
401 wakaba 1.1 }
402     }
403     ## NOTE: Case sensitive match (since HTML5 spec does not say link
404     ## types are case-insensitive and it says "The value should not
405     ## be confusingly similar to any other defined value (e.g.
406     ## differing only in case).").
407     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
408     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
409     ## values to be used conformingly.
410 wakaba 1.66
411     my $is_hyperlink;
412     my $is_resource;
413 wakaba 1.1 require Whatpm::_LinkTypeList;
414     our $LinkType;
415     for my $word (keys %word) {
416     my $def = $LinkType->{$word};
417     if (defined $def) {
418     if ($def->{status} eq 'accepted') {
419     if (defined $def->{effect}->[$a_or_area]) {
420     #
421     } else {
422     $self->{onerror}->(node => $attr,
423 wakaba 1.104 type => 'link type:bad context',
424     value => $word,
425 wakaba 1.110 level => $self->{level}->{must});
426 wakaba 1.1 }
427     } elsif ($def->{status} eq 'proposal') {
428 wakaba 1.104 $self->{onerror}->(node => $attr,
429     type => 'link type:proposed',
430     value => $word,
431     level => $self->{level}->{should});
432 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
433     #
434     } else {
435     $self->{onerror}->(node => $attr,
436 wakaba 1.104 type => 'link type:bad context',
437     value => $word,
438     level => $self->{level}->{must});
439 wakaba 1.20 }
440 wakaba 1.1 } else { # rejected or synonym
441     $self->{onerror}->(node => $attr,
442 wakaba 1.104 type => 'link type:non-conforming',
443     value => $word,
444     level => $self->{level}->{must});
445 wakaba 1.1 }
446 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
447     if ($word eq 'alternate') {
448     #
449     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
450 wakaba 1.66 $is_hyperlink = 1;
451 wakaba 1.4 }
452     }
453 wakaba 1.1 if ($def->{unique}) {
454     unless ($self->{has_link_type}->{$word}) {
455     $self->{has_link_type}->{$word} = 1;
456     } else {
457     $self->{onerror}->(node => $attr,
458 wakaba 1.104 type => 'link type:duplicate',
459     value => $word,
460     level => $self->{level}->{must});
461 wakaba 1.1 }
462     }
463 wakaba 1.66
464     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
465     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
466     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
467     }
468 wakaba 1.1 } else {
469 wakaba 1.104 $self->{onerror}->(node => $attr,
470     type => 'unknown link type',
471     value => $word,
472     level => $self->{level}->{uncertain});
473 wakaba 1.1 }
474     }
475 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
476 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
477     ## says that using both X-Pingback: header field and HTML
478     ## <link rel=pingback> is deprecated and if both appears they
479     ## SHOULD contain exactly the same value.
480     ## ISSUE: Pingback 1.0 specification defines the exact representation
481     ## of its link element, which cannot be tested by the current arch.
482     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
483     ## include any string that matches to the pattern for the rel=pingback link,
484     ## which again inpossible to test.
485     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
486 wakaba 1.12
487     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
488 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
489     ## then they SHOULD be described in different paragraphs.".
490 wakaba 1.66
491     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
492     if ($is_hyperlink or $a_or_area) {
493     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
494     }
495     if ($is_resource and not $a_or_area) {
496     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
497     }
498 wakaba 1.96
499     $element_state->{link_rel} = \%word;
500 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
501 wakaba 1.20
502     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
503 wakaba 1.1
504     ## URI (or IRI)
505     my $HTMLURIAttrChecker = sub {
506 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
507 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
508     my $value = $attr->value;
509     Whatpm::URIChecker->check_iri_reference ($value, sub {
510 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
511 wakaba 1.106 }), $self->{level};
512 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
513 wakaba 1.66
514     my $attr_name = $attr->name;
515     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
516     ## TODO: absolute
517     push @{$self->{return}->{uri}->{$value} ||= []},
518     $element_state->{uri_info}->{$attr_name};
519 wakaba 1.1 }; # $HTMLURIAttrChecker
520    
521     ## A space separated list of one or more URIs (or IRIs)
522     my $HTMLSpaceURIsAttrChecker = sub {
523     my ($self, $attr) = @_;
524 wakaba 1.66
525     my $type = {ping => 'action',
526     profile => 'namespace',
527     archive => 'resource'}->{$attr->name};
528    
529 wakaba 1.1 my $i = 0;
530 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
531 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
532 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
533 wakaba 1.106 }, $self->{level});
534 wakaba 1.66
535     ## TODO: absolute
536     push @{$self->{return}->{uri}->{$value} ||= []},
537 wakaba 1.67 {node => $attr, type => {$type => 1}};
538 wakaba 1.66
539 wakaba 1.1 $i++;
540     }
541 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
542 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
543     ## ISSUE: A sequence of white space characters are conformant?
544     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
545 wakaba 1.132 ## ISSUE: What is "space"?
546 wakaba 1.1 ## NOTE: Duplication seems not an error.
547 wakaba 1.4 $self->{has_uri_attr} = 1;
548 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
549    
550 wakaba 1.156 my $ValidEmailAddress;
551     {
552     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
553     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
554     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
555     }
556    
557 wakaba 1.168 ## Valid global date and time.
558     my $GetDateTimeAttrChecker = sub ($) {
559     my $type = shift;
560     return sub {
561     my ($self, $attr, $item, $element_state) = @_;
562    
563     my $range_error;
564    
565     require Message::Date;
566     my $dp = Message::Date->new;
567     $dp->{level} = $self->{level};
568     $dp->{onerror} = sub {
569     my %opt = @_;
570     unless ($opt{type} eq 'date value not supported') {
571     $self->{onerror}->(%opt, node => $attr);
572     $range_error = '';
573     }
574     };
575    
576     my $method = 'parse_' . $type;
577     my $d = $dp->$method ($attr->value);
578     $element_state->{date_value}->{$attr->name} = $d || $range_error;
579     };
580     }; # $GetDateTimeAttrChecker
581 wakaba 1.1
582     my $HTMLIntegerAttrChecker = sub {
583     my ($self, $attr) = @_;
584     my $value = $attr->value;
585     unless ($value =~ /\A-?[0-9]+\z/) {
586 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
587     level => $self->{level}->{must});
588 wakaba 1.1 }
589     }; # $HTMLIntegerAttrChecker
590    
591     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
592     my $range_check = shift;
593     return sub {
594     my ($self, $attr) = @_;
595     my $value = $attr->value;
596     if ($value =~ /\A[0-9]+\z/) {
597     unless ($range_check->($value + 0)) {
598 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
599     level => $self->{level}->{must});
600 wakaba 1.1 }
601     } else {
602     $self->{onerror}->(node => $attr,
603 wakaba 1.104 type => 'nninteger:syntax error',
604     level => $self->{level}->{must});
605 wakaba 1.1 }
606     };
607     }; # $GetHTMLNonNegativeIntegerAttrChecker
608    
609     my $GetHTMLFloatingPointNumberAttrChecker = sub {
610     my $range_check = shift;
611     return sub {
612 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
613 wakaba 1.1 my $value = $attr->value;
614 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
615     $value =~ /\A-?\.[0-9]+\z/) {
616 wakaba 1.168 if ($range_check->($value + 0)) {
617     ## TODO: parse algorithm
618     $element_state->{number_value}->{$attr->name} = $value + 0;
619     } else {
620 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
621     level => $self->{level}->{must});
622 wakaba 1.1 }
623     } else {
624     $self->{onerror}->(node => $attr,
625 wakaba 1.104 type => 'float:syntax error',
626     level => $self->{level}->{must});
627 wakaba 1.1 }
628     };
629 wakaba 1.144
630     ## TODO: scientific notation
631 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
632    
633 wakaba 1.148 my $StepAttrChecker = sub {
634     ## NOTE: A valid floating point number (> 0), or ASCII
635     ## case-insensitive "any".
636    
637     my ($self, $attr) = @_;
638     my $value = $attr->value;
639     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
640     $value =~ /\A-?\.[0-9]+\z/) {
641     unless ($value > 0) {
642     $self->{onerror}->(node => $attr, type => 'float:out of range',
643     level => $self->{level}->{must});
644     }
645     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
646     #
647     } else {
648     $self->{onerror}->(node => $attr,
649     type => 'float:syntax error',
650     level => $self->{level}->{must});
651     }
652    
653     ## TODO: scientific
654     }; # $StepAttrChecker
655    
656 wakaba 1.86 ## HTML4 %Length;
657     my $HTMLLengthAttrChecker = sub {
658     my ($self, $attr) = @_;
659     my $value = $attr->value;
660     unless ($value =~ /\A[0-9]+%?\z/) {
661     $self->{onerror}->(node => $attr, type => 'length:syntax error',
662 wakaba 1.104 level => $self->{level}->{must});
663 wakaba 1.86 }
664    
665     ## NOTE: HTML4 definition is too vague - it does not define the syntax
666     ## of percentage value at all (!).
667     }; # $HTMLLengthAttrChecker
668    
669 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
670     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
671     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
672    
673 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
674     ## ISSUE: RFC 2046 does not define syntax of media types.
675     ## ISSUE: The definition of "a valid MIME type" is unknown.
676     ## Syntactical correctness?
677     my $HTMLIMTAttrChecker = sub {
678     my ($self, $attr) = @_;
679     my $value = $attr->value;
680     ## ISSUE: RFC 2045 Content-Type header field allows insertion
681     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
682     ## ISSUE: RFC 2231 extension? Maybe no.
683     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
684     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
685 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
686 wakaba 1.1 my @type = ($1, $2);
687     my $param = $3;
688 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
689 wakaba 1.1 if (defined $2) {
690     push @type, $1 => $2;
691     } else {
692     my $n = $1;
693 wakaba 1.152 my $v = $3;
694 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
695 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
696 wakaba 1.1 }
697     }
698     require Whatpm::IMTChecker;
699 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
700     $ic->{level} = $self->{level};
701     $ic->check_imt (sub {
702 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
703 wakaba 1.1 }, @type);
704     } else {
705 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
706     level => $self->{level}->{must});
707 wakaba 1.1 }
708     }; # $HTMLIMTAttrChecker
709    
710     my $HTMLLanguageTagAttrChecker = sub {
711 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
712    
713 wakaba 1.1 my ($self, $attr) = @_;
714 wakaba 1.6 my $value = $attr->value;
715     require Whatpm::LangTag;
716     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
717 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
718 wakaba 1.106 }, $self->{level});
719 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
720 wakaba 1.6
721     ## TODO: testdata
722 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
723    
724     ## "A valid media query [MQ]"
725     my $HTMLMQAttrChecker = sub {
726     my ($self, $attr) = @_;
727 wakaba 1.104 $self->{onerror}->(node => $attr,
728     type => 'media query',
729     level => $self->{level}->{uncertain});
730 wakaba 1.1 ## ISSUE: What is "a valid media query"?
731     }; # $HTMLMQAttrChecker
732    
733     my $HTMLEventHandlerAttrChecker = sub {
734     my ($self, $attr) = @_;
735 wakaba 1.104 $self->{onerror}->(node => $attr,
736     type => 'event handler',
737     level => $self->{level}->{uncertain});
738 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
739     ## ECMAScript |FunctionBody| production. [ECMA262]
740     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
741     ## ISSUE: Automatic semicolon insertion does not apply?
742     ## ISSUE: Other script languages?
743     }; # $HTMLEventHandlerAttrChecker
744    
745 wakaba 1.136 my $HTMLFormAttrChecker = sub {
746     my ($self, $attr) = @_;
747    
748     ## NOTE: MUST be the ID of a |form| element.
749    
750     my $value = $attr->value;
751 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
752 wakaba 1.136
753     ## ISSUE: <form id=""><input form=""> (empty ID)?
754     }; # $HTMLFormAttrChecker
755    
756 wakaba 1.158 my $ListAttrChecker = sub {
757     my ($self, $attr) = @_;
758    
759     ## NOTE: MUST be the ID of a |datalist| element.
760    
761     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
762    
763     ## TODO: Warn violation to control-dependent restrictions. For
764     ## example, |<input type=url maxlength=10 list=a> <datalist
765     ## id=a><option value=nonurlandtoolong></datalist>| should be
766     ## warned.
767     }; # $ListAttrChecker
768    
769 wakaba 1.160 my $PatternAttrChecker = sub {
770     my ($self, $attr) = @_;
771     $self->{onsubdoc}->({s => $attr->value,
772     container_node => $attr,
773     media_type => 'text/x-regexp-js',
774     is_char_string => 1});
775 wakaba 1.161
776     ## ISSUE: "value must match the Pattern production of ECMA 262's
777     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
778    
779     ## TODO: Warn if @value does not match @pattern.
780 wakaba 1.160 }; # $PatternAttrChecker
781    
782 wakaba 1.161 my $AcceptAttrChecker = sub {
783     my ($self, $attr) = @_;
784    
785     my $value = $attr->value;
786     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
787     my @value = length $value ? split /,/, $value, -1 : ('');
788     my %has_value;
789     for my $v (@value) {
790     if ($has_value{$v}) {
791     $self->{onerror}->(node => $attr,
792     type => 'duplicate token',
793     value => $v,
794     level => $self->{level}->{must});
795     next;
796     }
797     $has_value{$v} = 1;
798    
799     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
800     #
801     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
802     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
803     ## define its own syntax citing RFC 4288.
804    
805     ## NOTE: Parameters not allowed.
806     require Whatpm::IMTChecker;
807     my $ic = Whatpm::IMTChecker->new;
808     $ic->{level} = $self->{level};
809     $ic->check_imt (sub {
810     $self->{onerror}->(@_, node => $attr);
811     }, $1, $2);
812     } else {
813     $self->{onerror}->(node => $attr,
814     type => 'IMTnp:syntax error', ## TODOC: type
815     value => $v,
816     level => $self->{level}->{must});
817     }
818     }
819     }; # $AcceptAttrChecker
820    
821 wakaba 1.165 my $FormControlNameAttrChecker = sub {
822     my ($self, $attr) = @_;
823    
824     unless (length $attr->value) {
825     $self->{onerror}->(node => $attr,
826     type => 'empty control name', ## TODOC: type
827     level => $self->{level}->{must});
828     }
829    
830     ## NOTE: No uniqueness constraint.
831     }; # $FormControlNameAttrChecker
832    
833     my $AutofocusAttrChecker = sub {
834     my ($self, $attr) = @_;
835    
836     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
837    
838     if ($self->{has_autofocus}) {
839     $self->{onerror}->(node => $attr,
840     type => 'duplicate autofocus', ## TODOC: type
841     level => $self->{level}->{must});
842     }
843     $self->{has_autofocus} = 1;
844     }; # $AutofocusAttrChekcer
845    
846 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
847     my ($self, $attr) = @_;
848 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
849 wakaba 1.1 my $value = $attr->value;
850     if ($value =~ s/^#//) {
851 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
852     ## according to the "rules for parsing a hash-name reference" algorithm.
853     ## The document is non-conforming anyway, since |<map name="">| (empty
854     ## name) is non-conforming.
855 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
856     } else {
857 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
858     level => $self->{level}->{must});
859 wakaba 1.1 }
860 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
861 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
862     }; # $HTMLUsemapAttrChecker
863    
864 wakaba 1.76 ## Valid browsing context name
865     my $HTMLBrowsingContextNameAttrChecker = sub {
866     my ($self, $attr) = @_;
867     my $value = $attr->value;
868     if ($value =~ /^_/) {
869     $self->{onerror}->(node => $attr, type => 'window name:reserved',
870 wakaba 1.104 level => $self->{level}->{must},
871 wakaba 1.76 value => $value);
872     } elsif (length $value) {
873     #
874     } else {
875     $self->{onerror}->(node => $attr, type => 'window name:empty',
876 wakaba 1.104 level => $self->{level}->{must});
877 wakaba 1.76 }
878     }; # $HTMLBrowsingContextNameAttrChecker
879    
880     ## Valid browsing context name or keyword
881 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
882     my ($self, $attr) = @_;
883     my $value = $attr->value;
884     if ($value =~ /^_/) {
885     $value = lc $value; ## ISSUE: ASCII case-insentitive?
886     unless ({
887 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
888 wakaba 1.1 }->{$value}) {
889     $self->{onerror}->(node => $attr,
890 wakaba 1.76 type => 'window name:reserved',
891 wakaba 1.104 level => $self->{level}->{must},
892 wakaba 1.76 value => $value);
893 wakaba 1.1 }
894 wakaba 1.76 } elsif (length $value) {
895     #
896 wakaba 1.1 } else {
897 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
898 wakaba 1.104 level => $self->{level}->{must});
899 wakaba 1.1 }
900     }; # $HTMLTargetAttrChecker
901    
902 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
903     my ($self, $attr) = @_;
904    
905     ## ISSUE: Namespace resolution?
906    
907     my $value = $attr->value;
908    
909     require Whatpm::CSS::SelectorsParser;
910     my $p = Whatpm::CSS::SelectorsParser->new;
911     $p->{pseudo_class}->{$_} = 1 for qw/
912     active checked disabled empty enabled first-child first-of-type
913     focus hover indeterminate last-child last-of-type link only-child
914     only-of-type root target visited
915     lang nth-child nth-last-child nth-of-type nth-last-of-type not
916     -manakai-contains -manakai-current
917     /;
918    
919     $p->{pseudo_element}->{$_} = 1 for qw/
920     after before first-letter first-line
921     /;
922    
923 wakaba 1.104 $p->{level} = $self->{level};
924 wakaba 1.23 $p->{onerror} = sub {
925 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
926 wakaba 1.23 };
927     $p->parse_string ($value);
928     }; # $HTMLSelectorsAttrChecker
929    
930 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
931     my ($charset_value, $self, $attr, $ascii_compat) = @_;
932    
933     ## NOTE: This code is used for |charset=""| attributes, |charset=|
934     ## portion of the |content=""| attributes, and |accept-charset=""|
935     ## attributes.
936 wakaba 1.91
937     ## NOTE: Though the case-sensitivility of |charset| attribute value
938     ## is not explicitly spelled in the HTML5 spec, the Character Set
939     ## registry of IANA, which is referenced from HTML5 spec, says that
940     ## charset name is case-insensitive.
941     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
942    
943     require Message::Charset::Info;
944     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
945    
946     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
947     ## Syntactically valid and registered? What about x-charset names?
948     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
949     ($charset_value)) {
950     $self->{onerror}->(node => $attr,
951 wakaba 1.104 type => 'charset:syntax error',
952     value => $charset_value,
953     level => $self->{level}->{must});
954 wakaba 1.91 }
955    
956     if ($charset) {
957     ## ISSUE: What is "the preferred name for that encoding" (for a charset
958     ## with no "preferred MIME name" label)?
959     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
960     if (($charset_status &
961     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
962     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
963     $self->{onerror}->(node => $attr,
964 wakaba 1.104 type => 'charset:not preferred',
965     value => $charset_value,
966     level => $self->{level}->{must});
967 wakaba 1.91 }
968 wakaba 1.129
969 wakaba 1.91 if (($charset_status &
970     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
971     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
972     if ($charset_value =~ /^x-/) {
973     $self->{onerror}->(node => $attr,
974 wakaba 1.104 type => 'charset:private',
975     value => $charset_value,
976     level => $self->{level}->{good});
977 wakaba 1.91 } else {
978     $self->{onerror}->(node => $attr,
979 wakaba 1.104 type => 'charset:not registered',
980     value => $charset_value,
981     level => $self->{level}->{good});
982 wakaba 1.91 }
983     }
984 wakaba 1.129
985     if ($ascii_compat) {
986     if ($charset->{category} &
987     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
988     #
989     } else {
990     $self->{onerror}->(node => $attr,
991     type => 'charset:not ascii compat',
992     value => $charset_value,
993     level => $self->{level}->{must});
994     }
995     }
996    
997 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
998     } elsif ($charset_value =~ /^x-/) {
999     $self->{onerror}->(node => $attr,
1000 wakaba 1.104 type => 'charset:private',
1001     value => $charset_value,
1002     level => $self->{level}->{good});
1003 wakaba 1.129
1004     ## NOTE: Whether this is an ASCII-compatible character encoding or
1005     ## not is unknown.
1006 wakaba 1.91 } else {
1007     $self->{onerror}->(node => $attr,
1008 wakaba 1.104 type => 'charset:not registered',
1009     value => $charset_value,
1010     level => $self->{level}->{good});
1011 wakaba 1.129
1012     ## NOTE: Whether this is an ASCII-compatible character encoding or
1013     ## not is unknown.
1014 wakaba 1.91 }
1015    
1016     return ($charset, $charset_value);
1017     }; # $HTMLCharsetChecker
1018    
1019 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1020     ## MUST be the preferred name of an ASCII-compatible character
1021     ## encoding".
1022     my $HTMLCharsetsAttrChecker = sub {
1023     my ($self, $attr) = @_;
1024    
1025     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1026    
1027 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1028 wakaba 1.129
1029 wakaba 1.176 ## XXX
1030 wakaba 1.129 ## ISSUE: Uniqueness is not enforced.
1031    
1032     for my $charset (@value) {
1033     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1034     }
1035    
1036     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1037     }; # $HTMLCharsetsAttrChecker
1038    
1039 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1040     my ($self, $attr) = @_;
1041    
1042     ## NOTE: HTML4 "color" or |%Color;|
1043    
1044     my $value = $attr->value;
1045    
1046     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1047 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1048 wakaba 1.105 level => $self->{level}->{html4_fact});
1049 wakaba 1.68 }
1050    
1051     ## TODO: HTML4 has some guideline on usage of color.
1052     }; # $HTMLColorAttrChecker
1053    
1054 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1055     my ($self, $attr) = @_;
1056     $HTMLURIAttrChecker->(@_);
1057    
1058     my $attr_name = $attr->name;
1059    
1060     if ($attr_name eq 'ref') {
1061     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1062     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1063 wakaba 1.104 level => $self->{level}->{must});
1064 wakaba 1.79 }
1065     }
1066 wakaba 1.155
1067     require Message::URL;
1068 wakaba 1.79 my $doc = $attr->owner_document;
1069     my $doc_uri = $doc->document_uri;
1070 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1071 wakaba 1.79 my $no_frag_uri = $uri->clone;
1072     $no_frag_uri->uri_fragment (undef);
1073     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1074     (not defined $doc_uri and $no_frag_uri eq '')) {
1075     my $fragid = $uri->uri_fragment;
1076     if (defined $fragid) {
1077     push @{$self->{$attr_name}}, [$fragid => $attr];
1078     } else {
1079     DOCEL: {
1080     last DOCEL unless $attr_name eq 'template';
1081    
1082     my $docel = $doc->document_element;
1083     if ($docel) {
1084     my $nsuri = $docel->namespace_uri;
1085     if (defined $nsuri and $nsuri eq $HTML_NS) {
1086     if ($docel->manakai_local_name eq 'datatemplate') {
1087     last DOCEL;
1088     }
1089     }
1090     }
1091    
1092     $self->{onerror}->(node => $attr, type => 'template:not template',
1093 wakaba 1.104 level => $self->{level}->{must});
1094 wakaba 1.79 } # DOCEL
1095     }
1096     } else {
1097     ## TODO: An external document is referenced.
1098     ## The document MUST be an HTML or XML document.
1099     ## If there is a fragment identifier, it MUST point a part of the doc.
1100     ## If the attribute is |template|, the pointed part MUST be a
1101     ## |datatemplat| element.
1102     ## If no fragment identifier is specified, the root element MUST be
1103     ## a |datatemplate| element when the attribute is |template|.
1104     }
1105     }; # $HTMLRefOrTemplateAttrChecker
1106    
1107 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1108     my ($self, $attr) = @_;
1109    
1110     if (defined $attr->namespace_uri) {
1111     my $oe = $attr->owner_element;
1112     my $oe_nsuri = $oe->namespace_uri;
1113 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1114 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1115 wakaba 1.104 level => $self->{level}->{must});
1116 wakaba 1.83 }
1117     }
1118    
1119     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1120     }; # $HTMLRepeatIndexAttrChecker
1121    
1122 wakaba 1.179 my $PlaceholderAttrChecker = sub {
1123     my ($self, $attr) = @_;
1124     if ($attr->value =~ /[\x0D\x0A]/) {
1125     $self->{onerror}->(node => $attr,
1126     type => 'newline in value', ## TODOC: type
1127     level => $self->{level}->{must});
1128     }
1129     }; # $PlaceholderAttrChecker
1130    
1131 wakaba 1.1 my $HTMLAttrChecker = {
1132 wakaba 1.176 accesskey => sub {
1133     my ($self, $attr) = @_;
1134    
1135     ## "Ordered set of unique space-separated tokens"
1136    
1137     my %keys;
1138     my @keys = grep {length} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1139    
1140     for my $key (@keys) {
1141     unless ($keys{$key}) {
1142     $keys{$key} = 1;
1143     if (length $key != 1) {
1144     $self->{onerror}->(node => $attr, type => 'char:syntax error',
1145     value => $key,
1146     level => $self->{level}->{must});
1147     }
1148     } else {
1149     $self->{onerror}->(node => $attr, type => 'duplicate token',
1150     value => $key,
1151     level => $self->{level}->{must});
1152     }
1153     }
1154     }, # accesskey
1155    
1156 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1157 wakaba 1.1 id => sub {
1158 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1159 wakaba 1.1 my $value = $attr->value;
1160     if (length $value > 0) {
1161     if ($self->{id}->{$value}) {
1162 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1163     level => $self->{level}->{must});
1164 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1165     } else {
1166     $self->{id}->{$value} = [$attr];
1167 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1168 wakaba 1.1 }
1169 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1170 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1171     level => $self->{level}->{must});
1172 wakaba 1.1 }
1173     } else {
1174     ## NOTE: MUST contain at least one character
1175 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1176     level => $self->{level}->{must});
1177 wakaba 1.1 }
1178     },
1179     title => sub {}, ## NOTE: No conformance creteria
1180     lang => sub {
1181     my ($self, $attr) = @_;
1182 wakaba 1.6 my $value = $attr->value;
1183     if ($value eq '') {
1184     #
1185     } else {
1186     require Whatpm::LangTag;
1187     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1188 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1189 wakaba 1.106 }, $self->{level});
1190 wakaba 1.6 }
1191 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1192 wakaba 1.6
1193     ## TODO: test data
1194 wakaba 1.111
1195     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1196     ## non-conforming. Such errors are detected by the checkers of
1197     ## |{}xml:lang| and |{xml}:lang| attributes.
1198 wakaba 1.1 },
1199     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1200     class => sub {
1201     my ($self, $attr) = @_;
1202 wakaba 1.132
1203     ## NOTE: "Unordered set of unique space-separated tokens".
1204    
1205 wakaba 1.1 my %word;
1206 wakaba 1.132 for my $word (grep {length $_}
1207     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1208 wakaba 1.1 unless ($word{$word}) {
1209     $word{$word} = 1;
1210     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1211     } else {
1212 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1213     value => $word,
1214     level => $self->{level}->{must});
1215 wakaba 1.1 }
1216     }
1217     },
1218 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1219     true => 1, false => 1, '' => 1,
1220     }),
1221 wakaba 1.1 contextmenu => sub {
1222     my ($self, $attr) = @_;
1223     my $value = $attr->value;
1224 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1225 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1226     ## What is "in the DOM"? A menu Element node that is not part
1227     ## of the Document tree is in the DOM? A menu Element node that
1228     ## belong to another Document tree is in the DOM?
1229     },
1230 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1231 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1232 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1233     registrationmark => sub {
1234     my ($self, $attr, $item, $element_state) = @_;
1235    
1236     ## NOTE: Any value is conforming.
1237    
1238     if ($self->{flag}->{in_rule}) {
1239     my $el = $attr->owner_element;
1240     my $ln = $el->manakai_local_name;
1241     if ($ln eq 'nest' or
1242     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1243     my $nsuri = $el->namespace_uri;
1244     if (defined $nsuri and $nsuri eq $HTML_NS) {
1245     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1246 wakaba 1.104 level => $self->{level}->{must});
1247 wakaba 1.79 }
1248     }
1249     } else {
1250     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1251 wakaba 1.104 level => $self->{level}->{must});
1252 wakaba 1.79 }
1253     },
1254 wakaba 1.80 repeat => sub {
1255     my ($self, $attr) = @_;
1256 wakaba 1.83
1257     if (defined $attr->namespace_uri) {
1258     my $oe = $attr->owner_element;
1259     my $oe_nsuri = $oe->namespace_uri;
1260     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1261     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1262 wakaba 1.104 level => $self->{level}->{must});
1263 wakaba 1.83 }
1264     }
1265    
1266 wakaba 1.80 my $value = $attr->value;
1267     if ($value eq 'template') {
1268     #
1269     } elsif ($value =~ /\A-?[0-9]+\z/) {
1270     #
1271     } else {
1272     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1273 wakaba 1.104 level => $self->{level}->{must});
1274 wakaba 1.80 }
1275    
1276     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1277     ## that the attribute MAY be specified to any element, or that the
1278     ## element with that attribute (i.e. a repetition template) can be
1279     ## inserted anywhere in a document tree?
1280     },
1281 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1282     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1283     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1284 wakaba 1.80 'repeat-template' => sub {
1285 wakaba 1.83 my ($self, $attr) = @_;
1286    
1287     if (defined $attr->namespace_uri) {
1288     my $oe = $attr->owner_element;
1289     my $oe_nsuri = $oe->namespace_uri;
1290 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1291 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1292 wakaba 1.104 level => $self->{level}->{must});
1293 wakaba 1.83 }
1294     }
1295    
1296 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1297     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1298     ## attribute allowed on an element that is not a repetition block?
1299     },
1300 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1301 wakaba 1.184 spellcheck => $GetHTMLEnumeratedAttrChecker->({
1302     true => 1, false => 1, '' => 1,
1303     }),
1304 wakaba 1.128 style => sub {
1305     my ($self, $attr) = @_;
1306    
1307     $self->{onsubdoc}->({s => $attr->value,
1308     container_node => $attr,
1309     media_type => 'text/x-css-inline',
1310     is_char_string => 1});
1311    
1312     ## NOTE: "... MUST still be comprehensible and usable if those
1313     ## attributes were removed" is a semantic requirement, it cannot
1314     ## be tested.
1315     },
1316 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1317 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1318 wakaba 1.111 'xml:lang' => sub {
1319     my ($self, $attr) = @_;
1320    
1321     if ($attr->owner_document->manakai_is_html) {
1322     $self->{onerror}->(type => 'in HTML:xml:lang',
1323     level => $self->{level}->{info},
1324     node => $attr);
1325     ## NOTE: This is not an error, but the attribute will be ignored.
1326     } else {
1327     $self->{onerror}->(type => 'in XML:xml:lang',
1328     level => $self->{level}->{html5_no_may},
1329     node => $attr);
1330     ## TODO: We need to add test for this error.
1331     }
1332    
1333     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1334     (undef, 'lang');
1335     if ($lang_attr) {
1336     my $lang_attr_value = $lang_attr->value;
1337     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1338     my $value = $attr->value;
1339     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1340     if ($lang_attr_value ne $value) {
1341     $self->{onerror}->(type => 'xml:lang ne lang',
1342     level => $self->{level}->{must},
1343     node => $attr);
1344     }
1345     } else {
1346     $self->{onerror}->(type => 'xml:lang not allowed',
1347     level => $self->{level}->{must},
1348     node => $attr);
1349     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1350     }
1351     },
1352 wakaba 1.74 xmlns => sub {
1353     my ($self, $attr) = @_;
1354     my $value = $attr->value;
1355     unless ($value eq $HTML_NS) {
1356 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1357     level => $self->{level}->{must});
1358 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1359     }
1360     unless ($attr->owner_document->manakai_is_html) {
1361 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1362     level => $self->{level}->{must});
1363 wakaba 1.74 ## TODO: Test
1364     }
1365    
1366     ## TODO: Should be resolved?
1367     push @{$self->{return}->{uri}->{$value} ||= []},
1368     {node => $attr, type => {namespace => 1}};
1369     },
1370 wakaba 1.1 };
1371    
1372 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1373    
1374 wakaba 1.49 my %HTMLAttrStatus = (
1375 wakaba 1.176 accesskey => FEATURE_HTML5_FD,
1376 wakaba 1.187 class => FEATURE_HTML5_LC,
1377     contenteditable => FEATURE_HTML5_REC,
1378 wakaba 1.50 contextmenu => FEATURE_HTML5_WD,
1379 wakaba 1.187 dir => FEATURE_HTML5_REC,
1380 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1381 wakaba 1.187 hidden => FEATURE_HTML5_LC,
1382     id => FEATURE_HTML5_REC,
1383 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1384 wakaba 1.187 lang => FEATURE_HTML5_REC,
1385 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1386     registrationmark => FEATURE_HTML5_AT_RISK,
1387 wakaba 1.60 repeat => FEATURE_WF2,
1388     'repeat-max' => FEATURE_WF2,
1389     'repeat-min' => FEATURE_WF2,
1390     'repeat-start' => FEATURE_WF2,
1391     'repeat-template' => FEATURE_WF2,
1392 wakaba 1.154 role => 0,
1393 wakaba 1.184 spellcheck => FEATURE_HTML5_WD,
1394 wakaba 1.187 style => FEATURE_HTML5_REC,
1395 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1396     template => FEATURE_HTML5_AT_RISK,
1397 wakaba 1.187 title => FEATURE_HTML5_REC,
1398 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1399 wakaba 1.49 );
1400    
1401     my %HTMLM12NCommonAttrStatus = (
1402 wakaba 1.154 about => FEATURE_RDFA_REC,
1403 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
1404 wakaba 1.154 content => FEATURE_RDFA_REC,
1405     datatype => FEATURE_RDFA_REC,
1406 wakaba 1.187 dir => FEATURE_HTML5_REC,
1407 wakaba 1.154 href => FEATURE_RDFA_REC,
1408 wakaba 1.187 id => FEATURE_HTML5_REC,
1409 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1410 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1411     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1412     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1413     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1414     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1415     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1416     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1417     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1418     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1419     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1420 wakaba 1.154 property => FEATURE_RDFA_REC,
1421     rel => FEATURE_RDFA_REC,
1422     resource => FEATURE_RDFA_REC,
1423     rev => FEATURE_RDFA_REC,
1424 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1425 wakaba 1.78 # FEATURE_M12N10_REC,
1426 wakaba 1.187 style => FEATURE_HTML5_REC,
1427     title => FEATURE_HTML5_REC,
1428 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1429 wakaba 1.49 );
1430    
1431 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1432     ## Core
1433 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
1434     id => FEATURE_HTML5_REC,
1435 wakaba 1.82 #xml:id
1436     layout => FEATURE_XHTML2_ED,
1437 wakaba 1.187 title => FEATURE_HTML5_REC,
1438 wakaba 1.82
1439     ## Hypertext
1440     cite => FEATURE_XHTML2_ED,
1441     href => FEATURE_XHTML2_ED,
1442     hreflang => FEATURE_XHTML2_ED,
1443     hrefmedia => FEATURE_XHTML2_ED,
1444     hreftype => FEATURE_XHTML2_ED,
1445     nextfocus => FEATURE_XHTML2_ED,
1446     prevfocus => FEATURE_XHTML2_ED,
1447     target => FEATURE_XHTML2_ED,
1448     #xml:base
1449    
1450     ## I18N
1451     #xml:lang
1452    
1453     ## Bi-directional
1454 wakaba 1.187 dir => FEATURE_HTML5_REC,
1455 wakaba 1.82
1456     ## Edit
1457     edit => FEATURE_XHTML2_ED,
1458     datetime => FEATURE_XHTML2_ED,
1459    
1460     ## Embedding
1461     encoding => FEATURE_XHTML2_ED,
1462     src => FEATURE_XHTML2_ED,
1463     srctype => FEATURE_XHTML2_ED,
1464    
1465     ## Image Map
1466     usemap => FEATURE_XHTML2_ED,
1467     ismap => FEATURE_XHTML2_ED,
1468     shape => FEATURE_XHTML2_ED,
1469     coords => FEATURE_XHTML2_ED,
1470    
1471     ## Media
1472     media => FEATURE_XHTML2_ED,
1473    
1474     ## Metadata
1475     about => FEATURE_XHTML2_ED,
1476     content => FEATURE_XHTML2_ED,
1477     datatype => FEATURE_XHTML2_ED,
1478     instanceof => FEATURE_XHTML2_ED,
1479     property => FEATURE_XHTML2_ED,
1480     rel => FEATURE_XHTML2_ED,
1481     resource => FEATURE_XHTML2_ED,
1482     rev => FEATURE_XHTML2_ED,
1483    
1484     ## Role
1485 wakaba 1.154 role => FEATURE_XHTML2_ED,
1486 wakaba 1.82
1487     ## Style
1488 wakaba 1.187 style => FEATURE_HTML5_REC,
1489 wakaba 1.82 );
1490    
1491     my %HTMLM12NXHTML2CommonAttrStatus = (
1492     %HTMLM12NCommonAttrStatus,
1493     %XHTML2CommonAttrStatus,
1494    
1495 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1496 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1497 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1498     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1499 wakaba 1.187 dir => FEATURE_HTML5_REC,
1500 wakaba 1.154 href => FEATURE_RDFA_REC,
1501 wakaba 1.187 id => FEATURE_HTML5_REC,
1502 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1503     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1504     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1505     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1506     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1507 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1508 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1509 wakaba 1.187 style => FEATURE_HTML5_REC,
1510     title => FEATURE_HTML5_REC,
1511 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1512 wakaba 1.82 );
1513    
1514 wakaba 1.1 for (qw/
1515     onabort onbeforeunload onblur onchange onclick oncontextmenu
1516     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1517     ondragstart ondrop onerror onfocus onkeydown onkeypress
1518 wakaba 1.180 onkeyup onload onmousedown onmousemove onmouseout
1519 wakaba 1.1 onmouseover onmouseup onmousewheel onresize onscroll onselect
1520 wakaba 1.77 onstorage onsubmit onunload
1521 wakaba 1.1 /) {
1522     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1523 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1524 wakaba 1.1 }
1525    
1526 wakaba 1.170 for (qw/
1527     ondataunavailable
1528 wakaba 1.180 onmessage
1529 wakaba 1.170 /) {
1530     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1531     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1532     }
1533    
1534 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1535     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1536     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1537    
1538     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1539     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1540     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1541     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1542     }
1543    
1544 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1545 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1546 wakaba 1.82 }
1547 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1548     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1549 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1550     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1551     ismap layout media nextfocus prevfocus shape src srctype style
1552     target usemap/) {
1553     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1554     }
1555     for (qw/class dir id title/) {
1556     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1557     }
1558     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1559     onmouseout onkeypress onkeydown onkeyup/) {
1560     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1561     }
1562    
1563 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1564     ## NOTE: "Authors should ... when the attributes are ignored and
1565     ## any associated CSS dropped, the page is still usable." (semantic
1566     ## constraint.)
1567     }; # $HTMLDatasetAttrChecker
1568    
1569 wakaba 1.187 my $HTMLDatasetAttrStatus = FEATURE_HTML5_LC;
1570 wakaba 1.73
1571 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1572     my $element_specific_checker = shift;
1573 wakaba 1.49 my $element_specific_status = shift;
1574 wakaba 1.1 return sub {
1575 wakaba 1.40 my ($self, $item, $element_state) = @_;
1576     for my $attr (@{$item->{node}->attributes}) {
1577 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1578     $attr_ns = '' unless defined $attr_ns;
1579     my $attr_ln = $attr->manakai_local_name;
1580     my $checker;
1581 wakaba 1.73 my $status;
1582 wakaba 1.1 if ($attr_ns eq '') {
1583 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1584     $attr_ln !~ /[A-Z]/) {
1585 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1586     $status = $HTMLDatasetAttrStatus;
1587     } else {
1588     $checker = $element_specific_checker->{$attr_ln}
1589     || $HTMLAttrChecker->{$attr_ln};
1590     $status = $element_specific_status->{$attr_ln};
1591     }
1592 wakaba 1.1 }
1593     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1594 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1595 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1596     || $AttrStatus->{$attr_ns}->{''};
1597     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1598 wakaba 1.1 if ($checker) {
1599 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1600 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1601 wakaba 1.54 #
1602 wakaba 1.1 } else {
1603 wakaba 1.104 $self->{onerror}->(node => $attr,
1604     type => 'unknown attribute',
1605     level => $self->{level}->{uncertain});
1606 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1607     }
1608 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1609 wakaba 1.1 }
1610     };
1611     }; # $GetHTMLAttrsChecker
1612    
1613 wakaba 1.40 my %HTMLChecker = (
1614     %Whatpm::ContentChecker::AnyChecker,
1615 wakaba 1.79 check_start => sub {
1616     my ($self, $item, $element_state) = @_;
1617    
1618     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1619     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1620     },
1621 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1622 wakaba 1.40 );
1623    
1624     my %HTMLEmptyChecker = (
1625     %HTMLChecker,
1626     check_child_element => sub {
1627     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1628     $child_is_transparent, $element_state) = @_;
1629 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1630     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1631 wakaba 1.40 $self->{onerror}->(node => $child_el,
1632     type => 'element not allowed:minus',
1633 wakaba 1.104 level => $self->{level}->{must});
1634 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1635     #
1636     } else {
1637     $self->{onerror}->(node => $child_el,
1638     type => 'element not allowed:empty',
1639 wakaba 1.104 level => $self->{level}->{must});
1640 wakaba 1.40 }
1641     },
1642     check_child_text => sub {
1643     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1644     if ($has_significant) {
1645     $self->{onerror}->(node => $child_node,
1646     type => 'character not allowed:empty',
1647 wakaba 1.104 level => $self->{level}->{must});
1648 wakaba 1.40 }
1649     },
1650     );
1651    
1652     my %HTMLTextChecker = (
1653     %HTMLChecker,
1654     check_child_element => sub {
1655     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1656     $child_is_transparent, $element_state) = @_;
1657 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1658     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1659 wakaba 1.40 $self->{onerror}->(node => $child_el,
1660     type => 'element not allowed:minus',
1661 wakaba 1.104 level => $self->{level}->{must});
1662 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1663     #
1664     } else {
1665 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1666     level => $self->{level}->{must});
1667 wakaba 1.40 }
1668     },
1669     );
1670    
1671 wakaba 1.72 my %HTMLFlowContentChecker = (
1672 wakaba 1.40 %HTMLChecker,
1673     check_child_element => sub {
1674     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1675     $child_is_transparent, $element_state) = @_;
1676 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1677     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1678 wakaba 1.40 $self->{onerror}->(node => $child_el,
1679     type => 'element not allowed:minus',
1680 wakaba 1.104 level => $self->{level}->{must});
1681 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1682     #
1683     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1684     if ($element_state->{has_non_style} or
1685     not $child_el->has_attribute_ns (undef, 'scoped')) {
1686 wakaba 1.104 $self->{onerror}->(node => $child_el,
1687 wakaba 1.72 type => 'element not allowed:flow style',
1688 wakaba 1.104 level => $self->{level}->{must});
1689 wakaba 1.40 }
1690 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1691 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1692 wakaba 1.40 } else {
1693     $element_state->{has_non_style} = 1;
1694 wakaba 1.104 $self->{onerror}->(node => $child_el,
1695 wakaba 1.72 type => 'element not allowed:flow',
1696 wakaba 1.104 level => $self->{level}->{must})
1697 wakaba 1.40 }
1698     },
1699     check_child_text => sub {
1700     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1701     if ($has_significant) {
1702     $element_state->{has_non_style} = 1;
1703     }
1704     },
1705     check_end => sub {
1706     my ($self, $item, $element_state) = @_;
1707 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1708 wakaba 1.40 if ($element_state->{has_significant}) {
1709 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1710 wakaba 1.40 } elsif ($item->{transparent}) {
1711     #
1712     } else {
1713     $self->{onerror}->(node => $item->{node},
1714 wakaba 1.104 level => $self->{level}->{should},
1715 wakaba 1.40 type => 'no significant content');
1716     }
1717     },
1718     );
1719    
1720     my %HTMLPhrasingContentChecker = (
1721     %HTMLChecker,
1722     check_child_element => sub {
1723     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1724     $child_is_transparent, $element_state) = @_;
1725 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1726     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1727 wakaba 1.40 $self->{onerror}->(node => $child_el,
1728     type => 'element not allowed:minus',
1729 wakaba 1.104 level => $self->{level}->{must});
1730 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1731     #
1732     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1733     #
1734     } else {
1735     $self->{onerror}->(node => $child_el,
1736     type => 'element not allowed:phrasing',
1737 wakaba 1.104 level => $self->{level}->{must});
1738 wakaba 1.40 }
1739     },
1740 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1741 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1742 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1743 wakaba 1.40 ## and |check_child_text|.
1744     );
1745    
1746 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1747 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1748 wakaba 1.46 ## with parent?
1749 wakaba 1.40
1750 wakaba 1.1 our $Element;
1751     our $ElementDefault;
1752    
1753     $Element->{$HTML_NS}->{''} = {
1754 wakaba 1.40 %HTMLChecker,
1755 wakaba 1.1 };
1756    
1757     $Element->{$HTML_NS}->{html} = {
1758 wakaba 1.187 status => FEATURE_HTML5_REC,
1759 wakaba 1.1 is_root => 1,
1760 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1761 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1762 wakaba 1.67 version => sub {
1763     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1764     ## Though DTDs of various versions of HTML define the attribute
1765     ## as |#FIXED|, this conformance checker does no check for
1766     ## the attribute value, since what kind of check should be done
1767     ## is unknown.
1768     },
1769 wakaba 1.49 }, {
1770     %HTMLAttrStatus,
1771 wakaba 1.82 %XHTML2CommonAttrStatus,
1772 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1773     dir => FEATURE_HTML5_REC,
1774     id => FEATURE_HTML5_REC,
1775     lang => FEATURE_HTML5_REC,
1776 wakaba 1.153 manifest => FEATURE_HTML5_WD,
1777 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1778 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1779 wakaba 1.1 }),
1780 wakaba 1.40 check_start => sub {
1781     my ($self, $item, $element_state) = @_;
1782     $element_state->{phase} = 'before head';
1783 wakaba 1.79
1784 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1785 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1786     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1787 wakaba 1.40 },
1788     check_child_element => sub {
1789     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1790     $child_is_transparent, $element_state) = @_;
1791 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1792     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1793 wakaba 1.40 $self->{onerror}->(node => $child_el,
1794     type => 'element not allowed:minus',
1795 wakaba 1.104 level => $self->{level}->{must});
1796 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1797     #
1798     } elsif ($element_state->{phase} eq 'before head') {
1799     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1800     $element_state->{phase} = 'after head';
1801     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1802     $self->{onerror}->(node => $child_el,
1803 wakaba 1.104 type => 'ps element missing',
1804     text => 'head',
1805     level => $self->{level}->{must});
1806 wakaba 1.40 $element_state->{phase} = 'after body';
1807     } else {
1808     $self->{onerror}->(node => $child_el,
1809 wakaba 1.104 type => 'element not allowed',
1810     level => $self->{level}->{must});
1811 wakaba 1.40 }
1812     } elsif ($element_state->{phase} eq 'after head') {
1813     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1814     $element_state->{phase} = 'after body';
1815     } else {
1816     $self->{onerror}->(node => $child_el,
1817 wakaba 1.104 type => 'element not allowed',
1818     level => $self->{level}->{must});
1819 wakaba 1.40 }
1820     } elsif ($element_state->{phase} eq 'after body') {
1821     $self->{onerror}->(node => $child_el,
1822 wakaba 1.104 type => 'element not allowed',
1823     level => $self->{level}->{must});
1824 wakaba 1.40 } else {
1825     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1826     }
1827     },
1828     check_child_text => sub {
1829     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1830     if ($has_significant) {
1831     $self->{onerror}->(node => $child_node,
1832 wakaba 1.104 type => 'character not allowed',
1833     level => $self->{level}->{must});
1834 wakaba 1.40 }
1835     },
1836     check_end => sub {
1837     my ($self, $item, $element_state) = @_;
1838     if ($element_state->{phase} eq 'after body') {
1839     #
1840     } elsif ($element_state->{phase} eq 'before head') {
1841     $self->{onerror}->(node => $item->{node},
1842 wakaba 1.104 type => 'child element missing',
1843     text => 'head',
1844     level => $self->{level}->{must});
1845 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1846 wakaba 1.104 type => 'child element missing',
1847     text => 'body',
1848     level => $self->{level}->{must});
1849 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1850     $self->{onerror}->(node => $item->{node},
1851 wakaba 1.104 type => 'child element missing',
1852     text => 'body',
1853     level => $self->{level}->{must});
1854 wakaba 1.40 } else {
1855     die "check_end: Bad |html| phase: $element_state->{phase}";
1856     }
1857 wakaba 1.1
1858 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1859     },
1860     };
1861 wakaba 1.25
1862 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1863 wakaba 1.187 status => FEATURE_HTML5_REC,
1864 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1865     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1866     }, {
1867 wakaba 1.49 %HTMLAttrStatus,
1868 wakaba 1.82 %XHTML2CommonAttrStatus,
1869 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1870     dir => FEATURE_HTML5_REC,
1871     id => FEATURE_HTML5_REC,
1872     lang => FEATURE_HTML5_REC,
1873 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1874     }),
1875 wakaba 1.40 check_child_element => sub {
1876     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1877     $child_is_transparent, $element_state) = @_;
1878 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1879     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1880 wakaba 1.40 $self->{onerror}->(node => $child_el,
1881     type => 'element not allowed:minus',
1882 wakaba 1.104 level => $self->{level}->{must});
1883 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1884     #
1885     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1886     unless ($element_state->{has_title}) {
1887     $element_state->{has_title} = 1;
1888     } else {
1889     $self->{onerror}->(node => $child_el,
1890     type => 'element not allowed:head title',
1891 wakaba 1.104 level => $self->{level}->{must});
1892 wakaba 1.40 }
1893     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1894     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1895     $self->{onerror}->(node => $child_el,
1896     type => 'element not allowed:head style',
1897 wakaba 1.104 level => $self->{level}->{must});
1898 wakaba 1.1 }
1899 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1900     #
1901    
1902     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1903     ## a |meta| element with none of |charset|, |name|,
1904     ## or |http-equiv| attribute is not allowed. It is non-conforming
1905     ## anyway.
1906 wakaba 1.56
1907     ## TODO: |form| MUST be empty and in XML [WF2].
1908 wakaba 1.40 } else {
1909     $self->{onerror}->(node => $child_el,
1910     type => 'element not allowed:metadata',
1911 wakaba 1.104 level => $self->{level}->{must});
1912 wakaba 1.40 }
1913     $element_state->{in_head_original} = $self->{flag}->{in_head};
1914     $self->{flag}->{in_head} = 1;
1915     },
1916     check_child_text => sub {
1917     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1918     if ($has_significant) {
1919 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1920     level => $self->{level}->{must});
1921 wakaba 1.1 }
1922 wakaba 1.40 },
1923     check_end => sub {
1924     my ($self, $item, $element_state) = @_;
1925     unless ($element_state->{has_title}) {
1926     $self->{onerror}->(node => $item->{node},
1927 wakaba 1.104 type => 'child element missing',
1928     text => 'title',
1929 wakaba 1.105 level => $self->{level}->{must});
1930 wakaba 1.1 }
1931 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1932 wakaba 1.1
1933 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1934 wakaba 1.1 },
1935     };
1936    
1937 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1938     %HTMLTextChecker,
1939 wakaba 1.187 status => FEATURE_HTML5_REC,
1940 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1941     %HTMLAttrStatus,
1942 wakaba 1.82 %XHTML2CommonAttrStatus,
1943 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1944     dir => FEATURE_HTML5_REC,
1945     id => FEATURE_HTML5_REC,
1946     lang => FEATURE_HTML5_REC,
1947 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1948 wakaba 1.49 }),
1949 wakaba 1.40 };
1950 wakaba 1.1
1951 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1952 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1953 wakaba 1.40 %HTMLEmptyChecker,
1954     check_attrs => sub {
1955     my ($self, $item, $element_state) = @_;
1956 wakaba 1.1
1957 wakaba 1.40 if ($self->{has_base}) {
1958     $self->{onerror}->(node => $item->{node},
1959 wakaba 1.104 type => 'element not allowed:base',
1960     level => $self->{level}->{must});
1961 wakaba 1.40 } else {
1962     $self->{has_base} = 1;
1963 wakaba 1.29 }
1964    
1965 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1966     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1967 wakaba 1.14
1968     if ($self->{has_uri_attr} and $has_href) {
1969 wakaba 1.4 ## ISSUE: Are these examples conforming?
1970     ## <head profile="a b c"><base href> (except for |profile|'s
1971     ## non-conformance)
1972     ## <title xml:base="relative"/><base href/> (maybe it should be)
1973     ## <unknown xmlns="relative"/><base href/> (assuming that
1974     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1975     ## <style>@import 'relative';</style><base href>
1976     ## <script>location.href = 'relative';</script><base href>
1977 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1978     ## an exception.
1979 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1980 wakaba 1.104 type => 'basehref after URL attribute',
1981     level => $self->{level}->{must});
1982 wakaba 1.4 }
1983 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1984 wakaba 1.4 ## ISSUE: Are these examples conforming?
1985     ## <head><title xlink:href=""/><base target="name"/></head>
1986     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1987     ## (assuming that |xbl:xbl| is allowed before |base|)
1988     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1989     ## <link href=""/><base target="name"/>
1990     ## <link rel=unknown href=""><base target=name>
1991 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1992 wakaba 1.104 type => 'basetarget after hyperlink',
1993     level => $self->{level}->{must});
1994 wakaba 1.4 }
1995    
1996 wakaba 1.14 if (not $has_href and not $has_target) {
1997 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1998 wakaba 1.104 type => 'attribute missing:href|target',
1999     level => $self->{level}->{must});
2000 wakaba 1.14 }
2001    
2002 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
2003    
2004 wakaba 1.4 return $GetHTMLAttrsChecker->({
2005     href => $HTMLURIAttrChecker,
2006     target => $HTMLTargetAttrChecker,
2007 wakaba 1.49 }, {
2008     %HTMLAttrStatus,
2009 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2010 wakaba 1.187 id => FEATURE_HTML5_REC,
2011 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2012 wakaba 1.40 })->($self, $item, $element_state);
2013 wakaba 1.4 },
2014 wakaba 1.1 };
2015    
2016     $Element->{$HTML_NS}->{link} = {
2017 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2018 wakaba 1.40 %HTMLEmptyChecker,
2019     check_attrs => sub {
2020     my ($self, $item, $element_state) = @_;
2021 wakaba 1.96 my $sizes_attr;
2022 wakaba 1.1 $GetHTMLAttrsChecker->({
2023 wakaba 1.91 charset => sub {
2024     my ($self, $attr) = @_;
2025     $HTMLCharsetChecker->($attr->value, @_);
2026     },
2027 wakaba 1.1 href => $HTMLURIAttrChecker,
2028 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2029 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2030 wakaba 1.1 media => $HTMLMQAttrChecker,
2031     hreflang => $HTMLLanguageTagAttrChecker,
2032 wakaba 1.96 sizes => sub {
2033     my ($self, $attr) = @_;
2034     $sizes_attr = $attr;
2035     my %word;
2036     for my $word (grep {length $_}
2037 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2038 wakaba 1.96 unless ($word{$word}) {
2039     $word{$word} = 1;
2040     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2041     #
2042     } else {
2043     $self->{onerror}->(node => $attr,
2044 wakaba 1.104 type => 'sizes:syntax error',
2045 wakaba 1.96 value => $word,
2046 wakaba 1.104 level => $self->{level}->{must});
2047 wakaba 1.96 }
2048     } else {
2049     $self->{onerror}->(node => $attr, type => 'duplicate token',
2050     value => $word,
2051 wakaba 1.104 level => $self->{level}->{must});
2052 wakaba 1.96 }
2053     }
2054     },
2055 wakaba 1.70 target => $HTMLTargetAttrChecker,
2056 wakaba 1.1 type => $HTMLIMTAttrChecker,
2057     ## NOTE: Though |title| has special semantics,
2058     ## syntactically same as the |title| as global attribute.
2059 wakaba 1.49 }, {
2060     %HTMLAttrStatus,
2061 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2062 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2063     ## NOTE: |charset| attribute had been part of HTML5 spec though
2064     ## it had been commented out.
2065 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2066 wakaba 1.82 FEATURE_M12N10_REC,
2067 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2068 wakaba 1.187 lang => FEATURE_HTML5_REC,
2069 wakaba 1.153 media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2070 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2071 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2072 wakaba 1.153 FEATURE_M12N10_REC,
2073 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2074 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2075 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2076 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2077 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2078     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2079 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2080 wakaba 1.40 })->($self, $item, $element_state);
2081 wakaba 1.96
2082 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2083     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2084 wakaba 1.4 } else {
2085 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2086 wakaba 1.104 type => 'attribute missing',
2087     text => 'href',
2088     level => $self->{level}->{must});
2089 wakaba 1.1 }
2090 wakaba 1.96
2091 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2092     $self->{onerror}->(node => $item->{node},
2093 wakaba 1.104 type => 'attribute missing',
2094     text => 'rel',
2095     level => $self->{level}->{must});
2096 wakaba 1.96 }
2097    
2098     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2099     $self->{onerror}->(node => $sizes_attr,
2100     type => 'attribute not allowed',
2101 wakaba 1.104 level => $self->{level}->{must});
2102 wakaba 1.1 }
2103 wakaba 1.116
2104     if ($element_state->{link_rel}->{alternate} and
2105     $element_state->{link_rel}->{stylesheet}) {
2106     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2107     unless ($title_attr) {
2108     $self->{onerror}->(node => $item->{node},
2109     type => 'attribute missing',
2110     text => 'title',
2111     level => $self->{level}->{must});
2112     } elsif ($title_attr->value eq '') {
2113     $self->{onerror}->(node => $title_attr,
2114     type => 'empty style sheet title',
2115     level => $self->{level}->{must});
2116     }
2117     }
2118 wakaba 1.1 },
2119     };
2120    
2121     $Element->{$HTML_NS}->{meta} = {
2122 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2123 wakaba 1.40 %HTMLEmptyChecker,
2124     check_attrs => sub {
2125     my ($self, $item, $element_state) = @_;
2126 wakaba 1.1 my $name_attr;
2127     my $http_equiv_attr;
2128     my $charset_attr;
2129     my $content_attr;
2130 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2131 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2132     $attr_ns = '' unless defined $attr_ns;
2133     my $attr_ln = $attr->manakai_local_name;
2134     my $checker;
2135 wakaba 1.73 my $status;
2136 wakaba 1.1 if ($attr_ns eq '') {
2137 wakaba 1.73 $status = {
2138     %HTMLAttrStatus,
2139 wakaba 1.82 %XHTML2CommonAttrStatus,
2140 wakaba 1.153 charset => FEATURE_HTML5_WD,
2141     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2142 wakaba 1.187 dir => FEATURE_HTML5_REC,
2143 wakaba 1.153 'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2144 wakaba 1.187 id => FEATURE_HTML5_REC,
2145     lang => FEATURE_HTML5_REC,
2146 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2147 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2148     }->{$attr_ln};
2149    
2150 wakaba 1.1 if ($attr_ln eq 'content') {
2151     $content_attr = $attr;
2152     $checker = 1;
2153     } elsif ($attr_ln eq 'name') {
2154     $name_attr = $attr;
2155     $checker = 1;
2156     } elsif ($attr_ln eq 'http-equiv') {
2157     $http_equiv_attr = $attr;
2158     $checker = 1;
2159     } elsif ($attr_ln eq 'charset') {
2160     $charset_attr = $attr;
2161     $checker = 1;
2162 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2163 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2164 wakaba 1.67 $checker = sub {};
2165 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2166     $attr_ln !~ /[A-Z]/) {
2167 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2168     $status = $HTMLDatasetAttrStatus;
2169 wakaba 1.1 } else {
2170     $checker = $HTMLAttrChecker->{$attr_ln}
2171 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2172 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2173     }
2174     } else {
2175     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2176 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2177     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2178     || $AttrStatus->{$attr_ns}->{''};
2179     $status = FEATURE_ALLOWED if not defined $status;
2180 wakaba 1.1 }
2181 wakaba 1.62
2182 wakaba 1.1 if ($checker) {
2183 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2184 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2185 wakaba 1.54 #
2186 wakaba 1.1 } else {
2187 wakaba 1.104 $self->{onerror}->(node => $attr,
2188     type => 'unknown attribute',
2189     level => $self->{level}->{uncertain});
2190 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2191     }
2192    
2193 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2194 wakaba 1.1 }
2195    
2196     if (defined $name_attr) {
2197     if (defined $http_equiv_attr) {
2198     $self->{onerror}->(node => $http_equiv_attr,
2199 wakaba 1.104 type => 'attribute not allowed',
2200     level => $self->{level}->{must});
2201 wakaba 1.1 } elsif (defined $charset_attr) {
2202     $self->{onerror}->(node => $charset_attr,
2203 wakaba 1.104 type => 'attribute not allowed',
2204     level => $self->{level}->{must});
2205 wakaba 1.1 }
2206     my $metadata_name = $name_attr->value;
2207     my $metadata_value;
2208     if (defined $content_attr) {
2209     $metadata_value = $content_attr->value;
2210     } else {
2211 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2212 wakaba 1.104 type => 'attribute missing',
2213     text => 'content',
2214     level => $self->{level}->{must});
2215 wakaba 1.1 $metadata_value = '';
2216     }
2217     } elsif (defined $http_equiv_attr) {
2218     if (defined $charset_attr) {
2219     $self->{onerror}->(node => $charset_attr,
2220 wakaba 1.104 type => 'attribute not allowed',
2221     level => $self->{level}->{must});
2222 wakaba 1.1 }
2223     unless (defined $content_attr) {
2224 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2225 wakaba 1.104 type => 'attribute missing',
2226     text => 'content',
2227     level => $self->{level}->{must});
2228 wakaba 1.1 }
2229     } elsif (defined $charset_attr) {
2230     if (defined $content_attr) {
2231     $self->{onerror}->(node => $content_attr,
2232 wakaba 1.104 type => 'attribute not allowed',
2233     level => $self->{level}->{must});
2234 wakaba 1.1 }
2235     } else {
2236     if (defined $content_attr) {
2237     $self->{onerror}->(node => $content_attr,
2238 wakaba 1.104 type => 'attribute not allowed',
2239     level => $self->{level}->{must});
2240 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2241 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2242     level => $self->{level}->{must});
2243 wakaba 1.1 } else {
2244 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2245 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2246     level => $self->{level}->{must});
2247 wakaba 1.1 }
2248     }
2249    
2250 wakaba 1.32 my $check_charset_decl = sub () {
2251 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2252 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2253     for my $el (@{$parent->child_nodes}) {
2254     next unless $el->node_type == 1; # ELEMENT_NODE
2255 wakaba 1.40 unless ($el eq $item->{node}) {
2256 wakaba 1.29 ## NOTE: Not the first child element.
2257 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2258 wakaba 1.32 type => 'element not allowed:meta charset',
2259 wakaba 1.104 level => $self->{level}->{must});
2260 wakaba 1.29 }
2261     last;
2262     ## NOTE: Entity references are not supported.
2263     }
2264     } else {
2265 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2266 wakaba 1.32 type => 'element not allowed:meta charset',
2267 wakaba 1.104 level => $self->{level}->{must});
2268 wakaba 1.29 }
2269 wakaba 1.32 }; # $check_charset_decl
2270 wakaba 1.21
2271 wakaba 1.32 my $check_charset = sub ($$) {
2272     my ($attr, $charset_value) = @_;
2273 wakaba 1.21
2274 wakaba 1.91 my $charset;
2275     ($charset, $charset_value)
2276     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2277    
2278 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2279 wakaba 1.21 if (defined $ic) {
2280     ## TODO: Test for this case
2281     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2282     if ($charset ne $ic_charset) {
2283 wakaba 1.32 $self->{onerror}->(node => $attr,
2284 wakaba 1.104 type => 'mismatched charset name',
2285 wakaba 1.106 text => $ic,
2286 wakaba 1.104 value => $charset_value,
2287     level => $self->{level}->{must});
2288 wakaba 1.21 }
2289     } else {
2290     ## NOTE: MUST, but not checkable, since the document is not originally
2291     ## in serialized form (or the parser does not preserve the input
2292     ## encoding information).
2293 wakaba 1.32 $self->{onerror}->(node => $attr,
2294 wakaba 1.104 type => 'mismatched charset name not checked',
2295     value => $charset_value,
2296     level => $self->{level}->{uncertain});
2297 wakaba 1.21 }
2298    
2299 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2300     $self->{onerror}->(node => $attr,
2301 wakaba 1.104 type => 'charref in charset',
2302     level => $self->{level}->{must},
2303     layer => 'syntax');
2304 wakaba 1.22 }
2305 wakaba 1.32 }; # $check_charset
2306    
2307     ## TODO: metadata conformance
2308    
2309     ## TODO: pragma conformance
2310     if (defined $http_equiv_attr) { ## An enumerated attribute
2311     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2312 wakaba 1.33
2313 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2314     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2315     node => $http_equiv_attr,
2316 wakaba 1.104 level => $self->{level}->{must});
2317 wakaba 1.85 } else {
2318     $self->{has_http_equiv}->{$keyword} = 1;
2319     }
2320    
2321     if ($keyword eq 'content-type') {
2322 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2323 wakaba 1.33
2324 wakaba 1.32 $check_charset_decl->();
2325 wakaba 1.182
2326     unless ($item->{node}->owner_document->manakai_is_html) {
2327     $self->{onerror}->(node => $item->{node},
2328     type => 'in XML:charset',
2329     level => $self->{level}->{must});
2330     }
2331    
2332 wakaba 1.32 if ($content_attr) {
2333     my $content = $content_attr->value;
2334 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2335 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2336 wakaba 1.58 =(.+)\z!sx) {
2337 wakaba 1.32 $check_charset->($content_attr, $1);
2338     } else {
2339     $self->{onerror}->(node => $content_attr,
2340     type => 'meta content-type syntax error',
2341 wakaba 1.104 level => $self->{level}->{must});
2342 wakaba 1.85 }
2343     }
2344     } elsif ($keyword eq 'default-style') {
2345     ## ISSUE: Not defined yet in the spec.
2346     } elsif ($keyword eq 'refresh') {
2347     if ($content_attr) {
2348     my $content = $content_attr->value;
2349     if ($content =~ /\A[0-9]+\z/) {
2350     ## NOTE: Valid non-negative integer.
2351     #
2352 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2353 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2354     Whatpm::URIChecker->check_iri_reference ($content, sub {
2355 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2356 wakaba 1.106 }, $self->{level});
2357 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2358    
2359     $element_state->{uri_info}->{content}->{node} = $content_attr;
2360     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2361     ## TODO: absolute
2362     push @{$self->{return}->{uri}->{$content} ||= []},
2363     $element_state->{uri_info}->{content};
2364     } else {
2365     $self->{onerror}->(node => $content_attr,
2366     type => 'refresh:syntax error',
2367 wakaba 1.104 level => $self->{level}->{must});
2368 wakaba 1.32 }
2369     }
2370     } else {
2371     $self->{onerror}->(node => $http_equiv_attr,
2372 wakaba 1.104 type => 'enumerated:invalid',
2373     level => $self->{level}->{must});
2374 wakaba 1.32 }
2375     }
2376    
2377     if (defined $charset_attr) {
2378 wakaba 1.182 my $value = $charset_attr->value;
2379    
2380 wakaba 1.32 $check_charset_decl->();
2381 wakaba 1.182 $check_charset->($charset_attr, $value);
2382    
2383     if (not $item->{node}->owner_document->manakai_is_html and
2384     not $value =~ /\A[Uu][Tt][Ff]-8\z/) {
2385     $self->{onerror}->(node => $item->{node},
2386     type => 'in XML:charset',
2387     level => $self->{level}->{must});
2388     }
2389 wakaba 1.1 }
2390     },
2391     };
2392    
2393     $Element->{$HTML_NS}->{style} = {
2394 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2395 wakaba 1.40 %HTMLChecker,
2396     check_attrs => $GetHTMLAttrsChecker->({
2397 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2398     media => $HTMLMQAttrChecker,
2399     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2400     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2401     ## not different
2402 wakaba 1.49 }, {
2403     %HTMLAttrStatus,
2404 wakaba 1.82 %XHTML2CommonAttrStatus,
2405 wakaba 1.187 dir => FEATURE_HTML5_REC,
2406 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2407 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2408 wakaba 1.187 id => FEATURE_HTML5_REC,
2409     lang => FEATURE_HTML5_REC,
2410 wakaba 1.153 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2411     scoped => FEATURE_HTML5_FD,
2412 wakaba 1.187 title => FEATURE_HTML5_REC,
2413 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2414 wakaba 1.1 }),
2415 wakaba 1.40 check_start => sub {
2416     my ($self, $item, $element_state) = @_;
2417    
2418 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2419 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2420 wakaba 1.93 $type = 'text/css' unless defined $type;
2421     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2422     $type = "$1/$2";
2423     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2424     } else {
2425     ## NOTE: We don't know how parameters are handled by UAs. According to
2426     ## HTML5 specification, <style> with unknown parameters in |type=""|
2427     ## must be ignored.
2428     undef $type;
2429     }
2430     if (not defined $type) {
2431     $element_state->{allow_element} = 1; # invalid type=""
2432     } elsif ($type eq 'text/css') {
2433 wakaba 1.40 $element_state->{allow_element} = 0;
2434 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2435     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2436     # $element_state->{allow_element} = 1;
2437 wakaba 1.40 } else {
2438     $element_state->{allow_element} = 1; # unknown
2439     }
2440 wakaba 1.93 $element_state->{style_type} = $type;
2441 wakaba 1.79
2442     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2443     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2444 wakaba 1.107
2445     $element_state->{text} = '';
2446 wakaba 1.40 },
2447     check_child_element => sub {
2448     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2449     $child_is_transparent, $element_state) = @_;
2450 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2451     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2452 wakaba 1.40 $self->{onerror}->(node => $child_el,
2453     type => 'element not allowed:minus',
2454 wakaba 1.104 level => $self->{level}->{must});
2455 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2456     #
2457     } elsif ($element_state->{allow_element}) {
2458     #
2459     } else {
2460 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2461     level => $self->{level}->{must});
2462 wakaba 1.40 }
2463     },
2464     check_child_text => sub {
2465     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2466 wakaba 1.115 $element_state->{text} .= $child_node->data;
2467 wakaba 1.40 },
2468     check_end => sub {
2469     my ($self, $item, $element_state) = @_;
2470 wakaba 1.93 if (not defined $element_state->{style_type}) {
2471     ## NOTE: Invalid type=""
2472     #
2473     } elsif ($element_state->{style_type} eq 'text/css') {
2474 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2475     container_node => $item->{node},
2476 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2477 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2478     ## NOTE: XML content should be checked by THIS instance of checker
2479     ## as part of normal tree validation. However, we don't know of any
2480     ## XML-based styling language that can be used in HTML <style> element,
2481     ## such that we throw a "style language not supported" error.
2482 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2483     type => 'XML style lang',
2484     text => $element_state->{style_type},
2485     level => $self->{level}->{uncertain});
2486 wakaba 1.93 } else {
2487     ## NOTE: Should we raise some kind of error for,
2488     ## say, <style type="text/plaion">?
2489     $self->{onsubdoc}->({s => $element_state->{text},
2490     container_node => $item->{node},
2491     media_type => $element_state->{style_type},
2492     is_char_string => 1});
2493 wakaba 1.27 }
2494 wakaba 1.40
2495     $HTMLChecker{check_end}->(@_);
2496 wakaba 1.1 },
2497     };
2498 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2499 wakaba 1.1
2500     $Element->{$HTML_NS}->{body} = {
2501 wakaba 1.72 %HTMLFlowContentChecker,
2502 wakaba 1.187 status => FEATURE_HTML5_REC,
2503 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2504     alink => $HTMLColorAttrChecker,
2505     background => $HTMLURIAttrChecker,
2506     bgcolor => $HTMLColorAttrChecker,
2507     link => $HTMLColorAttrChecker,
2508 wakaba 1.186 onpopstate => $HTMLEventHandlerAttrChecker,
2509 wakaba 1.68 text => $HTMLColorAttrChecker,
2510     vlink => $HTMLColorAttrChecker,
2511     }, {
2512 wakaba 1.49 %HTMLAttrStatus,
2513 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2514 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2515     background => FEATURE_M12N10_REC_DEPRECATED,
2516     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2517 wakaba 1.187 lang => FEATURE_HTML5_REC,
2518 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2519 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2520 wakaba 1.186 onpopstate => FEATURE_HTML5_LC,
2521 wakaba 1.50 onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2522 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2523     vlink => FEATURE_M12N10_REC_DEPRECATED,
2524     }),
2525 wakaba 1.68 check_start => sub {
2526     my ($self, $item, $element_state) = @_;
2527    
2528     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2529 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2530     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2531 wakaba 1.68 },
2532 wakaba 1.1 };
2533    
2534     $Element->{$HTML_NS}->{section} = {
2535 wakaba 1.72 %HTMLFlowContentChecker,
2536 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2537 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2538     }, {
2539     %HTMLAttrStatus,
2540     %XHTML2CommonAttrStatus,
2541     }),
2542 wakaba 1.1 };
2543    
2544     $Element->{$HTML_NS}->{nav} = {
2545 wakaba 1.153 status => FEATURE_HTML5_LC,
2546 wakaba 1.72 %HTMLFlowContentChecker,
2547 wakaba 1.1 };
2548    
2549     $Element->{$HTML_NS}->{article} = {
2550 wakaba 1.174 %HTMLFlowContentChecker,
2551 wakaba 1.153 status => FEATURE_HTML5_LC,
2552 wakaba 1.174 check_attrs => $GetHTMLAttrsChecker->({
2553     pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2554     }, {
2555     %HTMLAttrStatus,
2556     # XXX cite
2557     pubdate => FEATURE_HTML5_LC,
2558     }),
2559     }; # article
2560 wakaba 1.1
2561     $Element->{$HTML_NS}->{blockquote} = {
2562 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2563 wakaba 1.72 %HTMLFlowContentChecker,
2564 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2565 wakaba 1.1 cite => $HTMLURIAttrChecker,
2566 wakaba 1.49 }, {
2567     %HTMLAttrStatus,
2568 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2569 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2570 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2571 wakaba 1.187 lang => FEATURE_HTML5_REC,
2572 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2573 wakaba 1.1 }),
2574 wakaba 1.66 check_start => sub {
2575     my ($self, $item, $element_state) = @_;
2576    
2577     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2578 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2579     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2580 wakaba 1.66 },
2581 wakaba 1.1 };
2582    
2583     $Element->{$HTML_NS}->{aside} = {
2584 wakaba 1.153 status => FEATURE_HTML5_LC,
2585 wakaba 1.72 %HTMLFlowContentChecker,
2586 wakaba 1.1 };
2587    
2588     $Element->{$HTML_NS}->{h1} = {
2589 wakaba 1.40 %HTMLPhrasingContentChecker,
2590 wakaba 1.187 status => FEATURE_HTML5_REC,
2591 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2592     align => $GetHTMLEnumeratedAttrChecker->({
2593     left => 1, center => 1, right => 1, justify => 1,
2594     }),
2595     }, {
2596 wakaba 1.49 %HTMLAttrStatus,
2597 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2598 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2599 wakaba 1.187 lang => FEATURE_HTML5_REC,
2600 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2601 wakaba 1.49 }),
2602 wakaba 1.40 check_start => sub {
2603     my ($self, $item, $element_state) = @_;
2604     $self->{flag}->{has_hn} = 1;
2605 wakaba 1.79
2606     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2607     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2608 wakaba 1.1 },
2609     };
2610    
2611 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2612 wakaba 1.1
2613 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2614 wakaba 1.1
2615 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2616 wakaba 1.1
2617 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2618 wakaba 1.1
2619 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2620 wakaba 1.1
2621 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2622 wakaba 1.174
2623     # XXX footer in header is disallowed (HTML5 revision 3050)
2624 wakaba 1.29
2625 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2626 wakaba 1.153 status => FEATURE_HTML5_LC,
2627 wakaba 1.72 %HTMLFlowContentChecker,
2628 wakaba 1.40 check_start => sub {
2629     my ($self, $item, $element_state) = @_;
2630     $self->_add_minus_elements ($element_state,
2631     {$HTML_NS => {qw/header 1 footer 1/}},
2632 wakaba 1.58 $HTMLSectioningContent);
2633 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2634     $self->{flag}->{has_hn} = 0;
2635 wakaba 1.79
2636     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2637     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2638 wakaba 1.40 },
2639     check_end => sub {
2640     my ($self, $item, $element_state) = @_;
2641     $self->_remove_minus_elements ($element_state);
2642     unless ($self->{flag}->{has_hn}) {
2643     $self->{onerror}->(node => $item->{node},
2644 wakaba 1.104 type => 'element missing:hn',
2645     level => $self->{level}->{must});
2646 wakaba 1.40 }
2647     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2648 wakaba 1.1
2649 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2650 wakaba 1.1 },
2651 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2652 wakaba 1.1 };
2653    
2654     $Element->{$HTML_NS}->{footer} = {
2655 wakaba 1.153 status => FEATURE_HTML5_LC,
2656 wakaba 1.72 %HTMLFlowContentChecker,
2657 wakaba 1.40 check_start => sub {
2658     my ($self, $item, $element_state) = @_;
2659     $self->_add_minus_elements ($element_state,
2660 wakaba 1.177 {$HTML_NS => {header => 1, footer => 1}},
2661 wakaba 1.58 $HTMLSectioningContent,
2662 wakaba 1.57 $HTMLHeadingContent);
2663 wakaba 1.79
2664     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2665     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2666 wakaba 1.40 },
2667     check_end => sub {
2668     my ($self, $item, $element_state) = @_;
2669     $self->_remove_minus_elements ($element_state);
2670 wakaba 1.1
2671 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2672 wakaba 1.1 },
2673     };
2674    
2675     $Element->{$HTML_NS}->{address} = {
2676 wakaba 1.72 %HTMLFlowContentChecker,
2677 wakaba 1.187 status => FEATURE_HTML5_REC,
2678 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2679     ## TODO: add test
2680     #align => $GetHTMLEnumeratedAttrChecker->({
2681     # left => 1, center => 1, right => 1, justify => 1,
2682     #}),
2683     }, {
2684 wakaba 1.49 %HTMLAttrStatus,
2685 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2686 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2687 wakaba 1.187 lang => FEATURE_HTML5_REC,
2688 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2689     sdapref => FEATURE_HTML20_RFC,
2690 wakaba 1.49 }),
2691 wakaba 1.40 check_start => sub {
2692     my ($self, $item, $element_state) = @_;
2693 wakaba 1.177 $self->_add_minus_elements
2694     ($element_state,
2695     {$HTML_NS => {header => 1, footer => 1, address => 1}},
2696     $HTMLSectioningContent, $HTMLHeadingContent);
2697 wakaba 1.79
2698     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2699     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2700 wakaba 1.40 },
2701     check_end => sub {
2702     my ($self, $item, $element_state) = @_;
2703     $self->_remove_minus_elements ($element_state);
2704 wakaba 1.29
2705 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2706 wakaba 1.29 },
2707 wakaba 1.1 };
2708    
2709     $Element->{$HTML_NS}->{p} = {
2710 wakaba 1.40 %HTMLPhrasingContentChecker,
2711 wakaba 1.187 status => FEATURE_HTML5_REC,
2712 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2713     align => $GetHTMLEnumeratedAttrChecker->({
2714     left => 1, center => 1, right => 1, justify => 1,
2715     }),
2716     }, {
2717 wakaba 1.49 %HTMLAttrStatus,
2718 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2719 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2720 wakaba 1.187 lang => FEATURE_HTML5_REC,
2721 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2722 wakaba 1.49 }),
2723 wakaba 1.1 };
2724    
2725     $Element->{$HTML_NS}->{hr} = {
2726 wakaba 1.40 %HTMLEmptyChecker,
2727 wakaba 1.187 status => FEATURE_HTML5_REC,
2728 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2729     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2730     }, {
2731 wakaba 1.49 %HTMLAttrStatus,
2732     %HTMLM12NCommonAttrStatus,
2733     align => FEATURE_M12N10_REC_DEPRECATED,
2734 wakaba 1.187 lang => FEATURE_HTML5_REC,
2735 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2736 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2737 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2738     width => FEATURE_M12N10_REC_DEPRECATED,
2739     }),
2740 wakaba 1.1 };
2741    
2742     $Element->{$HTML_NS}->{br} = {
2743 wakaba 1.40 %HTMLEmptyChecker,
2744 wakaba 1.187 status => FEATURE_HTML5_REC,
2745 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2746     clear => $GetHTMLEnumeratedAttrChecker->({
2747     left => 1, all => 1, right => 1, none => 1,
2748     }),
2749     }, {
2750 wakaba 1.49 %HTMLAttrStatus,
2751 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2752 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2753 wakaba 1.187 id => FEATURE_HTML5_REC,
2754 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2755 wakaba 1.187 style => FEATURE_HTML5_REC,
2756     title => FEATURE_HTML5_REC,
2757 wakaba 1.49 }),
2758 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2759     ## (This requirement is semantic so that we cannot check.)
2760 wakaba 1.1 };
2761    
2762     $Element->{$HTML_NS}->{dialog} = {
2763 wakaba 1.153 status => FEATURE_HTML5_WD,
2764 wakaba 1.40 %HTMLChecker,
2765     check_start => sub {
2766     my ($self, $item, $element_state) = @_;
2767     $element_state->{phase} = 'before dt';
2768 wakaba 1.79
2769     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2770     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2771 wakaba 1.40 },
2772     check_child_element => sub {
2773     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2774     $child_is_transparent, $element_state) = @_;
2775 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2776     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2777 wakaba 1.40 $self->{onerror}->(node => $child_el,
2778     type => 'element not allowed:minus',
2779 wakaba 1.104 level => $self->{level}->{must});
2780 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2781     #
2782     } elsif ($element_state->{phase} eq 'before dt') {
2783     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2784     $element_state->{phase} = 'before dd';
2785     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2786     $self->{onerror}
2787 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2788     text => 'dt',
2789     level => $self->{level}->{must});
2790 wakaba 1.40 $element_state->{phase} = 'before dt';
2791     } else {
2792 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2793     level => $self->{level}->{must});
2794 wakaba 1.40 }
2795     } elsif ($element_state->{phase} eq 'before dd') {
2796     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2797     $element_state->{phase} = 'before dt';
2798     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2799     $self->{onerror}
2800 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2801     text => 'dd',
2802     level => $self->{level}->{must});
2803 wakaba 1.40 $element_state->{phase} = 'before dd';
2804     } else {
2805 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2806     level => $self->{level}->{must});
2807 wakaba 1.1 }
2808 wakaba 1.40 } else {
2809     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2810     }
2811     },
2812     check_child_text => sub {
2813     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2814     if ($has_significant) {
2815 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2816     level => $self->{level}->{must});
2817 wakaba 1.1 }
2818 wakaba 1.40 },
2819     check_end => sub {
2820     my ($self, $item, $element_state) = @_;
2821     if ($element_state->{phase} eq 'before dd') {
2822     $self->{onerror}->(node => $item->{node},
2823 wakaba 1.104 type => 'child element missing',
2824     text => 'dd',
2825     level => $self->{level}->{must});
2826 wakaba 1.1 }
2827 wakaba 1.40
2828     $HTMLChecker{check_end}->(@_);
2829 wakaba 1.1 },
2830     };
2831    
2832     $Element->{$HTML_NS}->{pre} = {
2833 wakaba 1.40 %HTMLPhrasingContentChecker,
2834 wakaba 1.187 status => FEATURE_HTML5_REC,
2835 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2836     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2837     }, {
2838 wakaba 1.49 %HTMLAttrStatus,
2839 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2840 wakaba 1.187 lang => FEATURE_HTML5_REC,
2841 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2842 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2843     }),
2844 wakaba 1.101 check_end => sub {
2845     my ($self, $item, $element_state) = @_;
2846    
2847     ## TODO: Flag to enable/disable IDL checking?
2848 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2849 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2850     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2851     ## NOTE: pre.code > code.idl-code: WebIDL spec
2852     ## NOTE: pre.idl-code: DOM1 spec
2853     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2854     ## NOTE: pre.schema: ReSpec-generated specs
2855 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2856     container_node => $item->{node},
2857     media_type => 'text/x-webidl',
2858     is_char_string => 1});
2859     }
2860    
2861 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2862 wakaba 1.101 },
2863 wakaba 1.1 };
2864    
2865     $Element->{$HTML_NS}->{ol} = {
2866 wakaba 1.40 %HTMLChecker,
2867 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2868 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2869 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2870 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2871 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2872 wakaba 1.69 ## TODO: HTML4 |type|
2873 wakaba 1.49 }, {
2874     %HTMLAttrStatus,
2875 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2876 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2877 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2878 wakaba 1.187 lang => FEATURE_HTML5_REC,
2879 wakaba 1.153 reversed => FEATURE_HTML5_WD,
2880 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2881 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2882     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2883 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2884 wakaba 1.1 }),
2885 wakaba 1.40 check_child_element => sub {
2886     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2887     $child_is_transparent, $element_state) = @_;
2888 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2889     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2890 wakaba 1.40 $self->{onerror}->(node => $child_el,
2891     type => 'element not allowed:minus',
2892 wakaba 1.104 level => $self->{level}->{must});
2893 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2894     #
2895     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2896     #
2897     } else {
2898 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2899     level => $self->{level}->{must});
2900 wakaba 1.1 }
2901 wakaba 1.40 },
2902     check_child_text => sub {
2903     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2904     if ($has_significant) {
2905 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2906     level => $self->{level}->{must});
2907 wakaba 1.1 }
2908     },
2909     };
2910    
2911     $Element->{$HTML_NS}->{ul} = {
2912 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2913 wakaba 1.187 status => FEATURE_HTML5_REC,
2914 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2915     compact => $GetHTMLBooleanAttrChecker->('compact'),
2916 wakaba 1.69 ## TODO: HTML4 |type|
2917     ## TODO: sdaform, align
2918 wakaba 1.68 }, {
2919 wakaba 1.49 %HTMLAttrStatus,
2920 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2921 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2922 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2923 wakaba 1.187 lang => FEATURE_HTML5_REC,
2924 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2925 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2926     }),
2927 wakaba 1.1 };
2928    
2929 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2930     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2931     %{$Element->{$HTML_NS}->{ul}},
2932     status => FEATURE_M12N10_REC_DEPRECATED,
2933 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2934     compact => $GetHTMLBooleanAttrChecker->('compact'),
2935     }, {
2936 wakaba 1.64 %HTMLAttrStatus,
2937     %HTMLM12NCommonAttrStatus,
2938     align => FEATURE_HTML2X_RFC,
2939     compact => FEATURE_M12N10_REC_DEPRECATED,
2940 wakaba 1.187 lang => FEATURE_HTML5_REC,
2941 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2942     sdapref => FEATURE_HTML20_RFC,
2943     }),
2944     };
2945    
2946 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2947 wakaba 1.72 %HTMLFlowContentChecker,
2948 wakaba 1.187 status => FEATURE_HTML5_REC,
2949 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2950 wakaba 1.69 ## TODO: HTML4 |type|
2951 wakaba 1.49 value => sub {
2952 wakaba 1.1 my ($self, $attr) = @_;
2953 wakaba 1.152
2954     my $parent_is_ol;
2955 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2956     if (defined $parent) {
2957     my $parent_ns = $parent->namespace_uri;
2958     $parent_ns = '' unless defined $parent_ns;
2959     my $parent_ln = $parent->manakai_local_name;
2960 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2961     }
2962    
2963     unless ($parent_is_ol) {
2964     ## ISSUE: No "MUST" in the spec.
2965     $self->{onerror}->(node => $attr,
2966     type => 'non-ol li value',
2967     level => $self->{level}->{html5_fact});
2968 wakaba 1.1 }
2969 wakaba 1.152
2970 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2971 wakaba 1.131 },
2972 wakaba 1.49 }, {
2973     %HTMLAttrStatus,
2974 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2975 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2976 wakaba 1.187 lang => FEATURE_HTML5_REC,
2977 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2978 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2979 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2980 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2981 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2982 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2983 wakaba 1.1 }),
2984 wakaba 1.40 check_child_element => sub {
2985     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2986     $child_is_transparent, $element_state) = @_;
2987     if ($self->{flag}->{in_menu}) {
2988 wakaba 1.152 ## TODO: In <dir> element, then ...
2989 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2990     } else {
2991 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2992 wakaba 1.40 }
2993     },
2994     check_child_text => sub {
2995     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2996     if ($self->{flag}->{in_menu}) {
2997 wakaba 1.152 ## TODO: In <dir> element, then ...
2998 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2999 wakaba 1.1 } else {
3000 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
3001 wakaba 1.1 }
3002     },
3003     };
3004    
3005     $Element->{$HTML_NS}->{dl} = {
3006 wakaba 1.40 %HTMLChecker,
3007 wakaba 1.187 status => FEATURE_HTML5_REC,
3008 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
3009     compact => $GetHTMLBooleanAttrChecker->('compact'),
3010     }, {
3011 wakaba 1.49 %HTMLAttrStatus,
3012 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3013 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
3014 wakaba 1.187 lang => FEATURE_HTML5_REC,
3015 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3016     sdapref => FEATURE_HTML20_RFC,
3017 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3018     }),
3019 wakaba 1.40 check_start => sub {
3020     my ($self, $item, $element_state) = @_;
3021     $element_state->{phase} = 'before dt';
3022 wakaba 1.79
3023     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3024     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3025 wakaba 1.40 },
3026     check_child_element => sub {
3027     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3028     $child_is_transparent, $element_state) = @_;
3029 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3030     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3031 wakaba 1.40 $self->{onerror}->(node => $child_el,
3032     type => 'element not allowed:minus',
3033 wakaba 1.104 level => $self->{level}->{must});
3034 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3035     #
3036     } elsif ($element_state->{phase} eq 'in dds') {
3037     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3038     #$element_state->{phase} = 'in dds';
3039     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3040     $element_state->{phase} = 'in dts';
3041     } else {
3042 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3043     level => $self->{level}->{must});
3044 wakaba 1.40 }
3045     } elsif ($element_state->{phase} eq 'in dts') {
3046     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3047     #$element_state->{phase} = 'in dts';
3048     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3049     $element_state->{phase} = 'in dds';
3050     } else {
3051 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3052     level => $self->{level}->{must});
3053 wakaba 1.40 }
3054     } elsif ($element_state->{phase} eq 'before dt') {
3055     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3056     $element_state->{phase} = 'in dts';
3057     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3058     $self->{onerror}
3059 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3060     text => 'dt',
3061     level => $self->{level}->{must});
3062 wakaba 1.40 $element_state->{phase} = 'in dds';
3063     } else {
3064 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3065     level => $self->{level}->{must});
3066 wakaba 1.1 }
3067 wakaba 1.40 } else {
3068     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3069 wakaba 1.1 }
3070 wakaba 1.40 },
3071     check_child_text => sub {
3072     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3073     if ($has_significant) {
3074 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3075     level => $self->{level}->{must});
3076 wakaba 1.40 }
3077     },
3078     check_end => sub {
3079     my ($self, $item, $element_state) = @_;
3080     if ($element_state->{phase} eq 'in dts') {
3081     $self->{onerror}->(node => $item->{node},
3082 wakaba 1.104 type => 'child element missing',
3083     text => 'dd',
3084     level => $self->{level}->{must});
3085 wakaba 1.1 }
3086    
3087 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3088 wakaba 1.1 },
3089     };
3090    
3091     $Element->{$HTML_NS}->{dt} = {
3092 wakaba 1.40 %HTMLPhrasingContentChecker,
3093 wakaba 1.187 status => FEATURE_HTML5_REC,
3094 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3095     %HTMLAttrStatus,
3096 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3097 wakaba 1.187 lang => FEATURE_HTML5_REC,
3098 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3099 wakaba 1.49 }),
3100 wakaba 1.1 };
3101    
3102     $Element->{$HTML_NS}->{dd} = {
3103 wakaba 1.72 %HTMLFlowContentChecker,
3104 wakaba 1.187 status => FEATURE_HTML5_REC,
3105 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3106     %HTMLAttrStatus,
3107 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3108 wakaba 1.187 lang => FEATURE_HTML5_REC,
3109 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3110 wakaba 1.49 }),
3111 wakaba 1.1 };
3112    
3113     $Element->{$HTML_NS}->{a} = {
3114 wakaba 1.123 %HTMLTransparentChecker,
3115 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3116 wakaba 1.40 check_attrs => sub {
3117     my ($self, $item, $element_state) = @_;
3118 wakaba 1.1 my %attr;
3119 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3120 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3121     $attr_ns = '' unless defined $attr_ns;
3122     my $attr_ln = $attr->manakai_local_name;
3123     my $checker;
3124 wakaba 1.73 my $status;
3125 wakaba 1.1 if ($attr_ns eq '') {
3126 wakaba 1.73 $status = {
3127     %HTMLAttrStatus,
3128 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3129 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
3130 wakaba 1.73 charset => FEATURE_M12N10_REC,
3131 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3132 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3133     dn => FEATURE_RFC2659,
3134 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3135 wakaba 1.153 FEATURE_M12N10_REC,
3136     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3137     FEATURE_M12N10_REC,
3138 wakaba 1.187 lang => FEATURE_HTML5_REC,
3139 wakaba 1.153 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3140 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3141     name => FEATURE_M12N10_REC_DEPRECATED,
3142     nonce => FEATURE_RFC2659,
3143     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3144     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3145 wakaba 1.153 ping => FEATURE_HTML5_WD,
3146 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3147     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3148 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3149 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3150 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3151 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3152     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3153 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3154     }->{$attr_ln};
3155    
3156 wakaba 1.1 $checker = {
3157 wakaba 1.91 charset => sub {
3158     my ($self, $attr) = @_;
3159     $HTMLCharsetChecker->($attr->value, @_);
3160     },
3161 wakaba 1.70 ## TODO: HTML4 |coords|
3162 wakaba 1.1 target => $HTMLTargetAttrChecker,
3163     href => $HTMLURIAttrChecker,
3164     ping => $HTMLSpaceURIsAttrChecker,
3165 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3166 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3167 wakaba 1.70 ## TODO: HTML4 |shape|
3168 wakaba 1.1 media => $HTMLMQAttrChecker,
3169 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3170 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3171     type => $HTMLIMTAttrChecker,
3172     }->{$attr_ln};
3173     if ($checker) {
3174     $attr{$attr_ln} = $attr;
3175 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3176     $attr_ln !~ /[A-Z]/) {
3177 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3178     $status = $HTMLDatasetAttrStatus;
3179 wakaba 1.1 } else {
3180     $checker = $HTMLAttrChecker->{$attr_ln};
3181     }
3182     }
3183     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3184     || $AttrChecker->{$attr_ns}->{''};
3185 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3186     || $AttrStatus->{$attr_ns}->{''};
3187     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3188 wakaba 1.62
3189 wakaba 1.1 if ($checker) {
3190 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3191 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3192 wakaba 1.54 #
3193 wakaba 1.1 } else {
3194 wakaba 1.104 $self->{onerror}->(node => $attr,
3195     type => 'unknown attribute',
3196     level => $self->{level}->{uncertain});
3197 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3198 wakaba 1.1 }
3199 wakaba 1.49
3200 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3201 wakaba 1.1 }
3202    
3203 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3204 wakaba 1.4 if (defined $attr{href}) {
3205     $self->{has_hyperlink_element} = 1;
3206 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3207 wakaba 1.4 } else {
3208 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3209     if (defined $attr{$_}) {
3210     $self->{onerror}->(node => $attr{$_},
3211 wakaba 1.104 type => 'attribute not allowed',
3212     level => $self->{level}->{must});
3213 wakaba 1.1 }
3214     }
3215     }
3216 wakaba 1.66
3217     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3218 wakaba 1.1 },
3219 wakaba 1.40 check_start => sub {
3220     my ($self, $item, $element_state) = @_;
3221     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3222 wakaba 1.79
3223     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3224     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3225 wakaba 1.40 },
3226     check_end => sub {
3227     my ($self, $item, $element_state) = @_;
3228     $self->_remove_minus_elements ($element_state);
3229 wakaba 1.59 delete $self->{flag}->{in_a_href}
3230     unless $element_state->{in_a_href_original};
3231 wakaba 1.1
3232 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3233 wakaba 1.1 },
3234     };
3235    
3236     $Element->{$HTML_NS}->{q} = {
3237 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3238 wakaba 1.40 %HTMLPhrasingContentChecker,
3239     check_attrs => $GetHTMLAttrsChecker->({
3240 wakaba 1.50 cite => $HTMLURIAttrChecker,
3241     }, {
3242 wakaba 1.49 %HTMLAttrStatus,
3243 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3244 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3245 wakaba 1.187 lang => FEATURE_HTML5_REC,
3246 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3247     sdasuff => FEATURE_HTML2X_RFC,
3248 wakaba 1.1 }),
3249 wakaba 1.66 check_start => sub {
3250     my ($self, $item, $element_state) = @_;
3251    
3252     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3253 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3254     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3255 wakaba 1.66 },
3256 wakaba 1.1 };
3257 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3258     ## placed inside the <code>q</code> element." Though we cannot test the
3259     ## element against this requirement since it incluides a semantic bit,
3260     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3261     ## the |q| element.
3262 wakaba 1.1
3263     $Element->{$HTML_NS}->{cite} = {
3264 wakaba 1.40 %HTMLPhrasingContentChecker,
3265 wakaba 1.187 status => FEATURE_HTML5_REC,
3266 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3267     %HTMLAttrStatus,
3268 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3269 wakaba 1.187 lang => FEATURE_HTML5_REC,
3270 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3271 wakaba 1.49 }),
3272 wakaba 1.1 };
3273    
3274     $Element->{$HTML_NS}->{em} = {
3275 wakaba 1.40 %HTMLPhrasingContentChecker,
3276 wakaba 1.187 status => FEATURE_HTML5_REC,
3277 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3278     %HTMLAttrStatus,
3279 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3280 wakaba 1.187 lang => FEATURE_HTML5_REC,
3281 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3282 wakaba 1.49 }),
3283 wakaba 1.1 };
3284    
3285     $Element->{$HTML_NS}->{strong} = {
3286 wakaba 1.40 %HTMLPhrasingContentChecker,
3287 wakaba 1.187 status => FEATURE_HTML5_REC,
3288 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3289     %HTMLAttrStatus,
3290 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3291 wakaba 1.187 lang => FEATURE_HTML5_REC,
3292 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3293 wakaba 1.49 }),
3294 wakaba 1.1 };
3295    
3296     $Element->{$HTML_NS}->{small} = {
3297 wakaba 1.40 %HTMLPhrasingContentChecker,
3298 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3299 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3300     %HTMLAttrStatus,
3301     %HTMLM12NCommonAttrStatus,
3302 wakaba 1.187 lang => FEATURE_HTML5_REC,
3303 wakaba 1.49 }),
3304 wakaba 1.1 };
3305    
3306 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3307     %HTMLPhrasingContentChecker,
3308     status => FEATURE_M12N10_REC,
3309     check_attrs => $GetHTMLAttrsChecker->({}, {
3310     %HTMLAttrStatus,
3311     %HTMLM12NCommonAttrStatus,
3312 wakaba 1.187 lang => FEATURE_HTML5_REC,
3313 wakaba 1.51 }),
3314     };
3315    
3316 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3317 wakaba 1.187 status => FEATURE_HTML5_WD,
3318 wakaba 1.40 %HTMLPhrasingContentChecker,
3319 wakaba 1.1 };
3320    
3321     $Element->{$HTML_NS}->{dfn} = {
3322 wakaba 1.40 %HTMLPhrasingContentChecker,
3323 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3324 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3325     %HTMLAttrStatus,
3326 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3327 wakaba 1.187 lang => FEATURE_HTML5_REC,
3328 wakaba 1.49 }),
3329 wakaba 1.40 check_start => sub {
3330     my ($self, $item, $element_state) = @_;
3331     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3332 wakaba 1.1
3333 wakaba 1.40 my $node = $item->{node};
3334 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3335     unless (defined $term) {
3336     for my $child (@{$node->child_nodes}) {
3337     if ($child->node_type == 1) { # ELEMENT_NODE
3338     if (defined $term) {
3339     undef $term;
3340     last;
3341     } elsif ($child->manakai_local_name eq 'abbr') {
3342     my $nsuri = $child->namespace_uri;
3343     if (defined $nsuri and $nsuri eq $HTML_NS) {
3344     my $attr = $child->get_attribute_node_ns (undef, 'title');
3345     if ($attr) {
3346     $term = $attr->value;
3347     }
3348     }
3349     }
3350     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3351     ## TEXT_NODE or CDATA_SECTION_NODE
3352 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3353 wakaba 1.1 next;
3354     }
3355     undef $term;
3356     last;
3357     }
3358     }
3359     unless (defined $term) {
3360     $term = $node->text_content;
3361     }
3362     }
3363     if ($self->{term}->{$term}) {
3364     push @{$self->{term}->{$term}}, $node;
3365     } else {
3366     $self->{term}->{$term} = [$node];
3367     }
3368 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3369     ## |ruby| unless |dfn| has |title|.
3370 wakaba 1.79
3371     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3372     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3373 wakaba 1.40 },
3374     check_end => sub {
3375     my ($self, $item, $element_state) = @_;
3376     $self->_remove_minus_elements ($element_state);
3377 wakaba 1.1
3378 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3379 wakaba 1.1 },
3380     };
3381    
3382     $Element->{$HTML_NS}->{abbr} = {
3383 wakaba 1.40 %HTMLPhrasingContentChecker,
3384 wakaba 1.187 status => FEATURE_HTML5_REC,
3385 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3386     %HTMLAttrStatus,
3387 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3388     full => FEATURE_XHTML2_ED,
3389 wakaba 1.187 lang => FEATURE_HTML5_REC,
3390 wakaba 1.49 }),
3391 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3392     ## number (plural vs singular) must match the grammatical number of the
3393     ## contents of the element." Though this can be checked by machine,
3394     ## it requires language-specific knowledge and dictionary, such that
3395     ## we don't support the check of the requirement.
3396     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3397 wakaba 1.49 };
3398    
3399     $Element->{$HTML_NS}->{acronym} = {
3400     %HTMLPhrasingContentChecker,
3401     status => FEATURE_M12N10_REC,
3402     check_attrs => $GetHTMLAttrsChecker->({}, {
3403     %HTMLAttrStatus,
3404     %HTMLM12NCommonAttrStatus,
3405 wakaba 1.187 lang => FEATURE_HTML5_REC,
3406 wakaba 1.49 }),
3407 wakaba 1.1 };
3408    
3409     $Element->{$HTML_NS}->{time} = {
3410 wakaba 1.187 status => FEATURE_HTML5_WD,
3411 wakaba 1.40 %HTMLPhrasingContentChecker,
3412     check_attrs => $GetHTMLAttrsChecker->({
3413 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3414 wakaba 1.49 }, {
3415     %HTMLAttrStatus,
3416     %HTMLM12NCommonAttrStatus,
3417 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3418 wakaba 1.1 }),
3419 wakaba 1.168 ## TODO: Update definition
3420 wakaba 1.1 ## TODO: Write tests
3421 wakaba 1.40 check_end => sub {
3422     my ($self, $item, $element_state) = @_;
3423 wakaba 1.1
3424 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3425 wakaba 1.1 my $input;
3426     my $reg_sp;
3427     my $input_node;
3428     if ($attr) {
3429     $input = $attr->value;
3430 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3431 wakaba 1.1 $input_node = $attr;
3432     } else {
3433 wakaba 1.40 $input = $item->{node}->text_content;
3434 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3435 wakaba 1.40 $input_node = $item->{node};
3436 wakaba 1.1
3437     ## ISSUE: What is the definition for "successfully extracts a date
3438     ## or time"? If the algorithm says the string is invalid but
3439     ## return some date or time, is it "successfully"?
3440     }
3441    
3442     my $hour;
3443     my $minute;
3444     my $second;
3445     if ($input =~ /
3446     \A
3447 wakaba 1.112 $reg_sp
3448 wakaba 1.1 ([0-9]+) # 1
3449     (?>
3450     -([0-9]+) # 2
3451 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3452     $reg_sp
3453 wakaba 1.1 (?>
3454     T
3455 wakaba 1.112 $reg_sp
3456 wakaba 1.1 )?
3457     ([0-9]+) # 4
3458     :([0-9]+) # 5
3459     (?>
3460     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3461     )?
3462 wakaba 1.112 $reg_sp
3463 wakaba 1.1 (?>
3464     Z
3465 wakaba 1.112 $reg_sp
3466 wakaba 1.1 |
3467     [+-]([0-9]+):([0-9]+) # 7, 8
3468 wakaba 1.112 $reg_sp
3469 wakaba 1.1 )?
3470     \z
3471     |
3472     :([0-9]+) # 9
3473     (?>
3474     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3475     )?
3476 wakaba 1.112 $reg_sp
3477     \z
3478 wakaba 1.1 )
3479     /x) {
3480     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3481     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3482     length $4 != 2 or length $5 != 2) {
3483     $self->{onerror}->(node => $input_node,
3484 wakaba 1.104 type => 'dateortime:syntax error',
3485     level => $self->{level}->{must});
3486 wakaba 1.1 }
3487    
3488     if (1 <= $2 and $2 <= 12) {
3489 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3490     level => $self->{level}->{must})
3491 wakaba 1.1 if $3 < 1 or
3492     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3493 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3494     level => $self->{level}->{must})
3495 wakaba 1.1 if $2 == 2 and $3 == 29 and
3496     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3497     } else {
3498     $self->{onerror}->(node => $input_node,
3499 wakaba 1.104 type => 'datetime:bad month',
3500     level => $self->{level}->{must});
3501 wakaba 1.1 }
3502    
3503     ($hour, $minute, $second) = ($4, $5, $6);
3504    
3505     if (defined $7) { ## [+-]hh:mm
3506     if (length $7 != 2 or length $8 != 2) {
3507     $self->{onerror}->(node => $input_node,
3508 wakaba 1.104 type => 'dateortime:syntax error',
3509     level => $self->{level}->{must});
3510 wakaba 1.1 }
3511    
3512     $self->{onerror}->(node => $input_node,
3513 wakaba 1.104 type => 'datetime:bad timezone hour',
3514     level => $self->{level}->{must})
3515 wakaba 1.1 if $7 > 23;
3516     $self->{onerror}->(node => $input_node,
3517 wakaba 1.104 type => 'datetime:bad timezone minute',
3518     level => $self->{level}->{must})
3519 wakaba 1.1 if $8 > 59;
3520     }
3521     } else { ## hh:mm
3522     if (length $1 != 2 or length $9 != 2) {
3523     $self->{onerror}->(node => $input_node,
3524 wakaba 1.104 type => qq'dateortime:syntax error',
3525     level => $self->{level}->{must});
3526 wakaba 1.1 }
3527    
3528     ($hour, $minute, $second) = ($1, $9, $10);
3529     }
3530    
3531 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3532     level => $self->{level}->{must}) if $hour > 23;
3533     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3534     level => $self->{level}->{must}) if $minute > 59;
3535 wakaba 1.1
3536     if (defined $second) { ## s
3537     ## NOTE: Integer part of second don't have to have length of two.
3538    
3539     if (substr ($second, 0, 1) eq '.') {
3540     $self->{onerror}->(node => $input_node,
3541 wakaba 1.104 type => 'dateortime:syntax error',
3542     level => $self->{level}->{must});
3543 wakaba 1.1 }
3544    
3545 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3546     level => $self->{level}->{must}) if $second >= 60;
3547 wakaba 1.1 }
3548     } else {
3549     $self->{onerror}->(node => $input_node,
3550 wakaba 1.104 type => 'dateortime:syntax error',
3551     level => $self->{level}->{must});
3552 wakaba 1.1 }
3553    
3554 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3555 wakaba 1.1 },
3556     };
3557    
3558     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3559 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3560 wakaba 1.113 ## TODO: content checking
3561     ## TODO: content or value must contain number (rev 2053)
3562 wakaba 1.40 %HTMLPhrasingContentChecker,
3563 wakaba 1.187 status => FEATURE_HTML5_WD,
3564 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3565 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3566     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3567     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3568     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3569     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3570     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3571 wakaba 1.50 }, {
3572     %HTMLAttrStatus,
3573     high => FEATURE_HTML5_DEFAULT,
3574     low => FEATURE_HTML5_DEFAULT,
3575     max => FEATURE_HTML5_DEFAULT,
3576     min => FEATURE_HTML5_DEFAULT,
3577     optimum => FEATURE_HTML5_DEFAULT,
3578     value => FEATURE_HTML5_DEFAULT,
3579 wakaba 1.1 }),
3580     };
3581    
3582     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3583 wakaba 1.40 %HTMLPhrasingContentChecker,
3584 wakaba 1.187 status => FEATURE_HTML5_WD,
3585 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3586 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3587     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3588 wakaba 1.50 }, {
3589     %HTMLAttrStatus,
3590     max => FEATURE_HTML5_DEFAULT,
3591     value => FEATURE_HTML5_DEFAULT,
3592 wakaba 1.1 }),
3593     };
3594    
3595     $Element->{$HTML_NS}->{code} = {
3596 wakaba 1.40 %HTMLPhrasingContentChecker,
3597 wakaba 1.187 status => FEATURE_HTML5_REC,
3598 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3599     %HTMLAttrStatus,
3600 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3601 wakaba 1.187 lang => FEATURE_HTML5_REC,
3602 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3603 wakaba 1.49 }),
3604 wakaba 1.1 };
3605    
3606     $Element->{$HTML_NS}->{var} = {
3607 wakaba 1.40 %HTMLPhrasingContentChecker,
3608 wakaba 1.187 status => FEATURE_HTML5_REC,
3609 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3610     %HTMLAttrStatus,
3611 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3612 wakaba 1.187 lang => FEATURE_HTML5_REC,
3613 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3614 wakaba 1.49 }),
3615 wakaba 1.1 };
3616    
3617     $Element->{$HTML_NS}->{samp} = {
3618 wakaba 1.40 %HTMLPhrasingContentChecker,
3619 wakaba 1.187 status => FEATURE_HTML5_REC,
3620 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3621     %HTMLAttrStatus,
3622 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3623 wakaba 1.187 lang => FEATURE_HTML5_REC,
3624 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3625 wakaba 1.49 }),
3626 wakaba 1.1 };
3627    
3628     $Element->{$HTML_NS}->{kbd} = {
3629 wakaba 1.40 %HTMLPhrasingContentChecker,
3630 wakaba 1.187 status => FEATURE_HTML5_REC,
3631 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3632     %HTMLAttrStatus,
3633 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3634 wakaba 1.187 lang => FEATURE_HTML5_REC,
3635 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3636 wakaba 1.49 }),
3637 wakaba 1.1 };
3638    
3639     $Element->{$HTML_NS}->{sub} = {
3640 wakaba 1.40 %HTMLPhrasingContentChecker,
3641 wakaba 1.187 status => FEATURE_HTML5_REC,
3642 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3643     %HTMLAttrStatus,
3644 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3645 wakaba 1.187 lang => FEATURE_HTML5_REC,
3646 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3647 wakaba 1.49 }),
3648 wakaba 1.1 };
3649    
3650 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3651 wakaba 1.1
3652     $Element->{$HTML_NS}->{span} = {
3653 wakaba 1.40 %HTMLPhrasingContentChecker,
3654 wakaba 1.187 status => FEATURE_HTML5_REC,
3655 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3656     %HTMLAttrStatus,
3657 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3658 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3659     dataformatas => FEATURE_HTML4_REC_RESERVED,
3660     datasrc => FEATURE_HTML4_REC_RESERVED,
3661 wakaba 1.187 lang => FEATURE_HTML5_REC,
3662 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3663 wakaba 1.49 }),
3664 wakaba 1.1 };
3665    
3666     $Element->{$HTML_NS}->{i} = {
3667 wakaba 1.40 %HTMLPhrasingContentChecker,
3668 wakaba 1.187 status => FEATURE_HTML5_REC,
3669 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3670     %HTMLAttrStatus,
3671     %HTMLM12NCommonAttrStatus,
3672 wakaba 1.187 lang => FEATURE_HTML5_REC,
3673 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3674 wakaba 1.49 }),
3675 wakaba 1.1 };
3676    
3677 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3678    
3679 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3680     %HTMLPhrasingContentChecker,
3681     status => FEATURE_M12N10_REC,
3682     check_attrs => $GetHTMLAttrsChecker->({}, {
3683     %HTMLAttrStatus,
3684     %HTMLM12NCommonAttrStatus,
3685 wakaba 1.187 lang => FEATURE_HTML5_REC,
3686 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3687     }),
3688     };
3689 wakaba 1.51
3690     $Element->{$HTML_NS}->{s} = {
3691 wakaba 1.40 %HTMLPhrasingContentChecker,
3692 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3693 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3694     %HTMLAttrStatus,
3695     %HTMLM12NCommonAttrStatus,
3696 wakaba 1.187 lang => FEATURE_HTML5_REC,
3697 wakaba 1.49 }),
3698 wakaba 1.1 };
3699    
3700 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3701    
3702     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3703    
3704 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3705 wakaba 1.40 %HTMLPhrasingContentChecker,
3706 wakaba 1.187 status => FEATURE_HTML5_REC,
3707 wakaba 1.40 check_attrs => sub {
3708     my ($self, $item, $element_state) = @_;
3709 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3710     %HTMLAttrStatus,
3711 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3712     dir => FEATURE_HTML5_REC,
3713     id => FEATURE_HTML5_REC,
3714     style => FEATURE_HTML5_REC,
3715     title => FEATURE_HTML5_REC,
3716     lang => FEATURE_HTML5_REC,
3717 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3718     sdasuff => FEATURE_HTML2X_RFC,
3719 wakaba 1.49 })->($self, $item, $element_state);
3720 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3721     $self->{onerror}->(node => $item->{node},
3722 wakaba 1.104 type => 'attribute missing',
3723     text => 'dir',
3724     level => $self->{level}->{must});
3725 wakaba 1.1 }
3726     },
3727     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3728     };
3729    
3730 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3731     %HTMLPhrasingContentChecker,
3732 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
3733 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
3734     %HTMLAttrStatus,
3735     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3736 wakaba 1.187 lang => FEATURE_HTML5_REC,
3737 wakaba 1.99 }),
3738     check_start => sub {
3739     my ($self, $item, $element_state) = @_;
3740    
3741     $element_state->{phase} = 'before-rb';
3742     #$element_state->{has_sig}
3743 wakaba 1.100
3744     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3745     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3746 wakaba 1.99 },
3747     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3748     check_child_element => sub {
3749     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3750     $child_is_transparent, $element_state) = @_;
3751 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3752     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3753 wakaba 1.99 $self->{onerror}->(node => $child_el,
3754     type => 'element not allowed:minus',
3755 wakaba 1.104 level => $self->{level}->{must});
3756 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3757     #
3758     } elsif ($element_state->{phase} eq 'before-rb') {
3759     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3760     $element_state->{phase} = 'in-rb';
3761     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3762     $self->{onerror}->(node => $child_el,
3763 wakaba 1.104 level => $self->{level}->{should},
3764     type => 'no significant content before');
3765 wakaba 1.99 $element_state->{phase} = 'after-rt';
3766     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3767     $self->{onerror}->(node => $child_el,
3768 wakaba 1.104 level => $self->{level}->{should},
3769     type => 'no significant content before');
3770 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3771     } else {
3772     $self->{onerror}->(node => $child_el,
3773 wakaba 1.104 type => 'element not allowed:ruby base',
3774     level => $self->{level}->{must});
3775 wakaba 1.99 $element_state->{phase} = 'in-rb';
3776     }
3777     } elsif ($element_state->{phase} eq 'in-rb') {
3778     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3779     #$element_state->{phase} = 'in-rb';
3780     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3781     unless ($element_state->{has_significant}) {
3782     $self->{onerror}->(node => $child_el,
3783 wakaba 1.104 level => $self->{level}->{should},
3784     type => 'no significant content before');
3785 wakaba 1.99 }
3786     $element_state->{phase} = 'after-rt';
3787     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3788     unless ($element_state->{has_significant}) {
3789     $self->{onerror}->(node => $child_el,
3790 wakaba 1.104 level => $self->{level}->{should},
3791     type => 'no significant content before');
3792 wakaba 1.99 }
3793     $element_state->{phase} = 'after-rp1';
3794     } else {
3795     $self->{onerror}->(node => $child_el,
3796 wakaba 1.104 type => 'element not allowed:ruby base',
3797     level => $self->{level}->{must});
3798 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3799     }
3800     } elsif ($element_state->{phase} eq 'after-rt') {
3801     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3802     if ($element_state->{has_significant}) {
3803     $element_state->{has_sig} = 1;
3804     delete $element_state->{has_significant};
3805     }
3806     $element_state->{phase} = 'in-rb';
3807     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3808     $self->{onerror}->(node => $child_el,
3809 wakaba 1.104 level => $self->{level}->{should},
3810     type => 'no significant content before');
3811 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3812     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3813     $self->{onerror}->(node => $child_el,
3814 wakaba 1.104 level => $self->{level}->{should},
3815     type => 'no significant content before');
3816 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3817     } else {
3818     $self->{onerror}->(node => $child_el,
3819 wakaba 1.104 type => 'element not allowed:ruby base',
3820     level => $self->{level}->{must});
3821 wakaba 1.99 if ($element_state->{has_significant}) {
3822     $element_state->{has_sig} = 1;
3823     delete $element_state->{has_significant};
3824     }
3825     $element_state->{phase} = 'in-rb';
3826     }
3827     } elsif ($element_state->{phase} eq 'after-rp1') {
3828     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3829     $element_state->{phase} = 'after-rp-rt';
3830     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3831     $self->{onerror}->(node => $child_el,
3832 wakaba 1.104 type => 'ps element missing',
3833     text => 'rt',
3834     level => $self->{level}->{must});
3835 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3836     } else {
3837     $self->{onerror}->(node => $child_el,
3838 wakaba 1.104 type => 'ps element missing',
3839     text => 'rt',
3840     level => $self->{level}->{must});
3841 wakaba 1.99 $self->{onerror}->(node => $child_el,
3842 wakaba 1.104 type => 'ps element missing',
3843     text => 'rp',
3844     level => $self->{level}->{must});
3845 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3846     $self->{onerror}->(node => $child_el,
3847 wakaba 1.104 type => 'element not allowed:ruby base',
3848     level => $self->{level}->{must});
3849 wakaba 1.99 }
3850     if ($element_state->{has_significant}) {
3851     $element_state->{has_sig} = 1;
3852     delete $element_state->{has_significant};
3853     }
3854     $element_state->{phase} = 'in-rb';
3855     }
3856     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3857     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3858     $element_state->{phase} = 'after-rp2';
3859     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3860     $self->{onerror}->(node => $child_el,
3861 wakaba 1.104 type => 'ps element missing',
3862     text => 'rp',
3863     level => $self->{level}->{must});
3864 wakaba 1.99 $self->{onerror}->(node => $child_el,
3865 wakaba 1.104 level => $self->{level}->{should},
3866     type => 'no significant content before');
3867 wakaba 1.99 $element_state->{phase} = 'after-rt';
3868     } else {
3869     $self->{onerror}->(node => $child_el,
3870 wakaba 1.104 type => 'ps element missing',
3871     text => 'rp',
3872     level => $self->{level}->{must});
3873 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3874     $self->{onerror}->(node => $child_el,
3875 wakaba 1.104 type => 'element not allowed:ruby base',
3876     level => $self->{level}->{must});
3877 wakaba 1.99 }
3878     if ($element_state->{has_significant}) {
3879     $element_state->{has_sig} = 1;
3880     delete $element_state->{has_significant};
3881     }
3882     $element_state->{phase} = 'in-rb';
3883     }
3884     } elsif ($element_state->{phase} eq 'after-rp2') {
3885     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3886     if ($element_state->{has_significant}) {
3887     $element_state->{has_sig} = 1;
3888     delete $element_state->{has_significant};
3889     }
3890     $element_state->{phase} = 'in-rb';
3891     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3892     $self->{onerror}->(node => $child_el,
3893 wakaba 1.104 level => $self->{level}->{should},
3894     type => 'no significant content before');
3895 wakaba 1.99 $element_state->{phase} = 'after-rt';
3896     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3897     $self->{onerror}->(node => $child_el,
3898 wakaba 1.104 level => $self->{level}->{should},
3899     type => 'no significant content before');
3900 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3901     } else {
3902     $self->{onerror}->(node => $child_el,
3903 wakaba 1.104 type => 'element not allowed:ruby base',
3904     level => $self->{level}->{must});
3905 wakaba 1.99 if ($element_state->{has_significant}) {
3906     $element_state->{has_sig} = 1;
3907     delete $element_state->{has_significant};
3908     }
3909     $element_state->{phase} = 'in-rb';
3910     }
3911     } else {
3912     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3913     }
3914     },
3915     check_child_text => sub {
3916     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3917     if ($has_significant) {
3918     if ($element_state->{phase} eq 'before-rb') {
3919     $element_state->{phase} = 'in-rb';
3920     } elsif ($element_state->{phase} eq 'in-rb') {
3921     #
3922     } elsif ($element_state->{phase} eq 'after-rt' or
3923     $element_state->{phase} eq 'after-rp2') {
3924     $element_state->{phase} = 'in-rb';
3925     } elsif ($element_state->{phase} eq 'after-rp1') {
3926     $self->{onerror}->(node => $child_node,
3927 wakaba 1.104 type => 'ps element missing',
3928     text => 'rt',
3929     level => $self->{level}->{must});
3930 wakaba 1.99 $self->{onerror}->(node => $child_node,
3931 wakaba 1.104 type => 'ps element missing',
3932     text => 'rp',
3933     level => $self->{level}->{must});
3934 wakaba 1.99 $element_state->{phase} = 'in-rb';
3935     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3936     $self->{onerror}->(node => $child_node,
3937 wakaba 1.104 type => 'ps element missing',
3938     text => 'rp',
3939     level => $self->{level}->{must});
3940 wakaba 1.99 $element_state->{phase} = 'in-rb';
3941     } else {
3942     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3943     }
3944     }
3945     },
3946     check_end => sub {
3947     my ($self, $item, $element_state) = @_;
3948     $self->_remove_minus_elements ($element_state);
3949    
3950     if ($element_state->{phase} eq 'before-rb') {
3951     $self->{onerror}->(node => $item->{node},
3952 wakaba 1.104 level => $self->{level}->{should},
3953 wakaba 1.99 type => 'no significant content');
3954     $self->{onerror}->(node => $item->{node},
3955 wakaba 1.104 type => 'element missing',
3956     text => 'rt',
3957     level => $self->{level}->{must});
3958 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3959     unless ($element_state->{has_significant}) {
3960     $self->{onerror}->(node => $item->{node},
3961 wakaba 1.104 level => $self->{level}->{should},
3962     type => 'no significant content at the end');
3963 wakaba 1.99 }
3964     $self->{onerror}->(node => $item->{node},
3965 wakaba 1.104 type => 'element missing',
3966     text => 'rt',
3967     level => $self->{level}->{must});
3968 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3969     $element_state->{phase} eq 'after-rp2') {
3970     #
3971     } elsif ($element_state->{phase} eq 'after-rp1') {
3972     $self->{onerror}->(node => $item->{node},
3973 wakaba 1.104 type => 'element missing',
3974     text => 'rt',
3975     level => $self->{level}->{must});
3976 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3977 wakaba 1.104 type => 'element missing',
3978     text => 'rp',
3979     level => $self->{level}->{must});
3980 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3981     $self->{onerror}->(node => $item->{node},
3982 wakaba 1.104 type => 'element missing',
3983     text => 'rp',
3984     level => $self->{level}->{must});
3985 wakaba 1.99 } else {
3986     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3987     }
3988    
3989     ## NOTE: A modified version of |check_end| of %AnyChecker.
3990     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3991     $item->{real_parent_state}->{has_significant} = 1;
3992     }
3993     },
3994     };
3995    
3996     $Element->{$HTML_NS}->{rt} = {
3997     %HTMLPhrasingContentChecker,
3998 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
3999 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
4000     %HTMLAttrStatus,
4001     %HTMLM12NXHTML2CommonAttrStatus,
4002 wakaba 1.187 lang => FEATURE_HTML5_REC,
4003 wakaba 1.99 }),
4004     };
4005    
4006     $Element->{$HTML_NS}->{rp} = {
4007 wakaba 1.171 %HTMLPhrasingContentChecker,
4008 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4009 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
4010     %HTMLAttrStatus,
4011     %HTMLM12NXHTML2CommonAttrStatus,
4012 wakaba 1.187 lang => FEATURE_HTML5_REC,
4013 wakaba 1.99 }),
4014 wakaba 1.171 }; # rp
4015 wakaba 1.99
4016 wakaba 1.29 =pod
4017    
4018     ## TODO:
4019    
4020     +
4021     + <p>Partly because of the confusion described above, authors are
4022     + strongly recommended to always mark up all paragraphs with the
4023     + <code>p</code> element, and to not have any <code>ins</code> or
4024     + <code>del</code> elements that cross across any <span
4025     + title="paragraph">implied paragraphs</span>.</p>
4026     +
4027     (An informative note)
4028    
4029     <p><code>ins</code> elements should not cross <span
4030     + title="paragraph">implied paragraph</span> boundaries.</p>
4031     (normative)
4032    
4033     + <p><code>del</code> elements should not cross <span
4034     + title="paragraph">implied paragraph</span> boundaries.</p>
4035     (normative)
4036    
4037     =cut
4038    
4039 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4040 wakaba 1.40 %HTMLTransparentChecker,
4041 wakaba 1.187 status => FEATURE_HTML5_REC,
4042 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4043 wakaba 1.1 cite => $HTMLURIAttrChecker,
4044 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4045 wakaba 1.49 }, {
4046     %HTMLAttrStatus,
4047     %HTMLM12NCommonAttrStatus,
4048 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4049 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4050 wakaba 1.187 lang => FEATURE_HTML5_REC,
4051 wakaba 1.1 }),
4052 wakaba 1.66 check_start => sub {
4053     my ($self, $item, $element_state) = @_;
4054    
4055     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4056 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4057     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4058 wakaba 1.66 },
4059 wakaba 1.1 };
4060    
4061     $Element->{$HTML_NS}->{del} = {
4062 wakaba 1.40 %HTMLTransparentChecker,
4063 wakaba 1.187 status => FEATURE_HTML5_REC,
4064 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4065 wakaba 1.1 cite => $HTMLURIAttrChecker,
4066 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4067 wakaba 1.49 }, {
4068     %HTMLAttrStatus,
4069     %HTMLM12NCommonAttrStatus,
4070 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4071 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4072 wakaba 1.187 lang => FEATURE_HTML5_REC,
4073 wakaba 1.1 }),
4074 wakaba 1.40 check_end => sub {
4075     my ($self, $item, $element_state) = @_;
4076     if ($element_state->{has_significant}) {
4077     ## NOTE: Significantness flag does not propagate.
4078     } elsif ($item->{transparent}) {
4079     #
4080     } else {
4081     $self->{onerror}->(node => $item->{node},
4082 wakaba 1.104 level => $self->{level}->{should},
4083 wakaba 1.40 type => 'no significant content');
4084     }
4085 wakaba 1.1 },
4086 wakaba 1.66 check_start => sub {
4087     my ($self, $item, $element_state) = @_;
4088    
4089     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4090 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4091     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4092 wakaba 1.66 },
4093 wakaba 1.1 };
4094    
4095 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4096 wakaba 1.72 %HTMLFlowContentChecker,
4097 wakaba 1.153 status => FEATURE_HTML5_WD,
4098 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4099 wakaba 1.41 check_child_element => sub {
4100     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4101     $child_is_transparent, $element_state) = @_;
4102 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4103     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4104 wakaba 1.41 $self->{onerror}->(node => $child_el,
4105     type => 'element not allowed:minus',
4106 wakaba 1.104 level => $self->{level}->{must});
4107 wakaba 1.41 $element_state->{has_non_legend} = 1;
4108 wakaba 1.181 $element_state->{has_non_table} = 1;
4109 wakaba 1.41 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4110 wakaba 1.181 $element_state->{has_non_table} = 1;
4111 wakaba 1.41 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4112     if ($element_state->{has_legend_at_first}) {
4113     $self->{onerror}->(node => $child_el,
4114     type => 'element not allowed:figure legend',
4115 wakaba 1.104 level => $self->{level}->{must});
4116 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4117     $self->{onerror}->(node => $element_state->{has_legend},
4118     type => 'element not allowed:figure legend',
4119 wakaba 1.104 level => $self->{level}->{must});
4120 wakaba 1.41 $element_state->{has_legend} = $child_el;
4121     } elsif ($element_state->{has_non_legend}) {
4122     $element_state->{has_legend} = $child_el;
4123     } else {
4124     $element_state->{has_legend_at_first} = 1;
4125 wakaba 1.35 }
4126 wakaba 1.41 delete $element_state->{has_non_legend};
4127     } else {
4128 wakaba 1.181 if ($child_nsuri eq $HTML_NS and $child_ln eq 'table') {
4129     $element_state->{has_table}++;
4130     } else {
4131     $element_state->{has_non_table}++;
4132     }
4133 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4134 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4135 wakaba 1.41 }
4136     },
4137     check_child_text => sub {
4138     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4139     if ($has_significant) {
4140     $element_state->{has_non_legend} = 1;
4141 wakaba 1.181 $element_state->{has_non_table}++;
4142 wakaba 1.35 }
4143 wakaba 1.170
4144     $element_state->{in_figure} = 1;
4145 wakaba 1.41 },
4146     check_end => sub {
4147     my ($self, $item, $element_state) = @_;
4148 wakaba 1.35
4149 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4150     #
4151     } elsif ($element_state->{has_legend}) {
4152     if ($element_state->{has_non_legend}) {
4153     $self->{onerror}->(node => $element_state->{has_legend},
4154 wakaba 1.35 type => 'element not allowed:figure legend',
4155 wakaba 1.104 level => $self->{level}->{must});
4156 wakaba 1.35 }
4157     }
4158 wakaba 1.41
4159 wakaba 1.181 if (($element_state->{has_table} || 0) == 1 and
4160     not $element_state->{has_non_table} and
4161     $element_state->{table_caption_element}) {
4162     $self->{onerror}->(node => $element_state->{table_caption_element},
4163     type => 'element not allowed',
4164     level => $self->{level}->{should});
4165     }
4166    
4167 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4168 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4169 wakaba 1.35 },
4170     };
4171 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4172 wakaba 1.1
4173 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4174     my ($self, $attr) = @_;
4175 wakaba 1.104 $self->{onerror}->(node => $attr,
4176     type => 'unknown attribute',
4177     level => $self->{level}->{uncertain});
4178 wakaba 1.92 };
4179    
4180 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4181 wakaba 1.40 %HTMLEmptyChecker,
4182 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4183 wakaba 1.40 check_attrs => sub {
4184     my ($self, $item, $element_state) = @_;
4185 wakaba 1.1 $GetHTMLAttrsChecker->({
4186 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4187     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4188     }),
4189 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4190 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4191 wakaba 1.1 src => $HTMLURIAttrChecker,
4192     usemap => $HTMLUsemapAttrChecker,
4193 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4194 wakaba 1.1 ismap => sub {
4195 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4196     if (not $self->{flag}->{in_a_href}) {
4197 wakaba 1.15 $self->{onerror}->(node => $attr,
4198 wakaba 1.59 type => 'attribute not allowed:ismap',
4199 wakaba 1.104 level => $self->{level}->{must});
4200 wakaba 1.1 }
4201 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4202 wakaba 1.1 },
4203 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4204     ## TODO: HTML4 |name|
4205 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4206 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4207 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4208 wakaba 1.49 }, {
4209     %HTMLAttrStatus,
4210 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4211 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4212 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4213 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4214 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4215 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4216 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4217 wakaba 1.187 lang => FEATURE_HTML5_REC,
4218 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4219     name => FEATURE_M12N10_REC_DEPRECATED,
4220 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4221 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4222     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4223 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4224 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4225 wakaba 1.66 })->($self, $item, $element_state);
4226 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4227     $self->{onerror}->(node => $item->{node},
4228 wakaba 1.104 type => 'attribute missing',
4229     text => 'alt',
4230     level => $self->{level}->{should});
4231 wakaba 1.114 ## TODO: ...
4232 wakaba 1.1 }
4233 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4234     $self->{onerror}->(node => $item->{node},
4235 wakaba 1.104 type => 'attribute missing',
4236     text => 'src',
4237     level => $self->{level}->{must});
4238 wakaba 1.1 }
4239 wakaba 1.66
4240 wakaba 1.114 ## TODO: external resource check
4241    
4242 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4243     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4244     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4245     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4246 wakaba 1.1 },
4247     };
4248    
4249     $Element->{$HTML_NS}->{iframe} = {
4250 wakaba 1.40 %HTMLTextChecker,
4251 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4252 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4253 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4254 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4255 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4256 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4257     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4258     }),
4259     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4260 wakaba 1.1 src => $HTMLURIAttrChecker,
4261 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4262 wakaba 1.49 }, {
4263     %HTMLAttrStatus,
4264     %HTMLM12NCommonAttrStatus,
4265     align => FEATURE_XHTML10_REC,
4266 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4267 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4268 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4269 wakaba 1.187 id => FEATURE_HTML5_REC,
4270 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4271     marginheight => FEATURE_M12N10_REC,
4272     marginwidth => FEATURE_M12N10_REC,
4273 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4274     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4275     sandbox => FEATURE_HTML5_WD,
4276 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4277 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4278     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4279 wakaba 1.187 title => FEATURE_HTML5_REC,
4280 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4281 wakaba 1.1 }),
4282 wakaba 1.66 check_start => sub {
4283     my ($self, $item, $element_state) = @_;
4284    
4285     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4286 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4287     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4288 wakaba 1.66 },
4289 wakaba 1.40 };
4290    
4291 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4292 wakaba 1.40 %HTMLEmptyChecker,
4293 wakaba 1.98 status => FEATURE_HTML5_WD,
4294 wakaba 1.40 check_attrs => sub {
4295     my ($self, $item, $element_state) = @_;
4296 wakaba 1.1 my $has_src;
4297 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4298 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4299     $attr_ns = '' unless defined $attr_ns;
4300     my $attr_ln = $attr->manakai_local_name;
4301     my $checker;
4302 wakaba 1.73
4303     my $status = {
4304     %HTMLAttrStatus,
4305 wakaba 1.153 height => FEATURE_HTML5_LC,
4306 wakaba 1.98 src => FEATURE_HTML5_WD,
4307     type => FEATURE_HTML5_WD,
4308 wakaba 1.153 width => FEATURE_HTML5_LC,
4309 wakaba 1.73 }->{$attr_ln};
4310    
4311 wakaba 1.1 if ($attr_ns eq '') {
4312     if ($attr_ln eq 'src') {
4313     $checker = $HTMLURIAttrChecker;
4314     $has_src = 1;
4315     } elsif ($attr_ln eq 'type') {
4316     $checker = $HTMLIMTAttrChecker;
4317 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4318 wakaba 1.178 $checker = $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 });
4319 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4320     $attr_ln !~ /[A-Z]/) {
4321 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4322     $status = $HTMLDatasetAttrStatus;
4323 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4324 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4325 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4326 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4327     || sub { }; ## NOTE: Any local attribute is ok.
4328 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4329 wakaba 1.117 } else {
4330     $checker = $HTMLAttrChecker->{$attr_ln};
4331 wakaba 1.1 }
4332     }
4333     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4334 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4335     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4336     || $AttrStatus->{$attr_ns}->{''};
4337     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4338 wakaba 1.62
4339 wakaba 1.1 if ($checker) {
4340 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4341 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4342 wakaba 1.54 #
4343 wakaba 1.1 } else {
4344 wakaba 1.104 $self->{onerror}->(node => $attr,
4345     type => 'unknown attribute',
4346     level => $self->{level}->{uncertain});
4347 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4348     }
4349    
4350 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4351 wakaba 1.1 }
4352    
4353     unless ($has_src) {
4354 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4355 wakaba 1.104 type => 'attribute missing',
4356     text => 'src',
4357 wakaba 1.114 level => $self->{level}->{info});
4358     ## NOTE: <embed> without src="" is allowed since revision 1929.
4359     ## We issues an informational message since <embed> w/o src=""
4360     ## is likely an authoring error.
4361 wakaba 1.1 }
4362 wakaba 1.114
4363     ## TODO: external resource check
4364 wakaba 1.66
4365     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4366 wakaba 1.1 },
4367     };
4368    
4369 wakaba 1.49 ## TODO:
4370     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4371     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4372    
4373 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4374 wakaba 1.40 %HTMLTransparentChecker,
4375 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4376 wakaba 1.40 check_attrs => sub {
4377     my ($self, $item, $element_state) = @_;
4378 wakaba 1.1 $GetHTMLAttrsChecker->({
4379 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4380     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4381     }),
4382     archive => $HTMLSpaceURIsAttrChecker,
4383     ## TODO: Relative to @codebase
4384     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4385     classid => $HTMLURIAttrChecker,
4386     codebase => $HTMLURIAttrChecker,
4387     codetype => $HTMLIMTAttrChecker,
4388     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4389 wakaba 1.1 data => $HTMLURIAttrChecker,
4390 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4391     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4392     ## [HTML4] but we don't know how to test this.
4393 wakaba 1.167 form => $HTMLFormAttrChecker,
4394 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4395 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4396 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4397 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4398     ## the name of the browsing context created by the element,
4399     ## if any, but is also used as the form control name of the
4400     ## form control provided by the plugin, if any.
4401 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4402 wakaba 1.1 type => $HTMLIMTAttrChecker,
4403     usemap => $HTMLUsemapAttrChecker,
4404 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4405 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4406 wakaba 1.49 }, {
4407     %HTMLAttrStatus,
4408 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4409 wakaba 1.49 align => FEATURE_XHTML10_REC,
4410 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4411 wakaba 1.49 border => FEATURE_XHTML10_REC,
4412     classid => FEATURE_M12N10_REC,
4413     codebase => FEATURE_M12N10_REC,
4414     codetype => FEATURE_M12N10_REC,
4415 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4416 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4417 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4418     dataformatas => FEATURE_HTML4_REC_RESERVED,
4419     datasrc => FEATURE_HTML4_REC_RESERVED,
4420 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4421 wakaba 1.187 form => FEATURE_HTML5_LC,
4422 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4423 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4424 wakaba 1.187 lang => FEATURE_HTML5_REC,
4425 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4426 wakaba 1.49 standby => FEATURE_M12N10_REC,
4427 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4428 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4429     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4430 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4431 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4432 wakaba 1.66 })->($self, $item, $element_state);
4433 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4434     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4435     $self->{onerror}->(node => $item->{node},
4436 wakaba 1.104 type => 'attribute missing:data|type',
4437     level => $self->{level}->{must});
4438 wakaba 1.1 }
4439     }
4440 wakaba 1.66
4441     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4442     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4443     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4444     ## TODO: archive
4445     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4446 wakaba 1.1 },
4447 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4448 wakaba 1.41 check_child_element => sub {
4449     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4450     $child_is_transparent, $element_state) = @_;
4451 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4452     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4453 wakaba 1.41 $self->{onerror}->(node => $child_el,
4454     type => 'element not allowed:minus',
4455 wakaba 1.104 level => $self->{level}->{must});
4456 wakaba 1.41 $element_state->{has_non_legend} = 1;
4457     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4458     #
4459     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4460     if ($element_state->{has_non_param}) {
4461 wakaba 1.104 $self->{onerror}->(node => $child_el,
4462 wakaba 1.72 type => 'element not allowed:flow',
4463 wakaba 1.104 level => $self->{level}->{must});
4464 wakaba 1.39 }
4465 wakaba 1.41 } else {
4466 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4467 wakaba 1.41 $element_state->{has_non_param} = 1;
4468 wakaba 1.39 }
4469 wakaba 1.25 },
4470 wakaba 1.41 check_child_text => sub {
4471     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4472     if ($has_significant) {
4473     $element_state->{has_non_param} = 1;
4474     }
4475 wakaba 1.42 },
4476     check_end => sub {
4477     my ($self, $item, $element_state) = @_;
4478     if ($element_state->{has_significant}) {
4479 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4480 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4481     ## NOTE: Transparent.
4482     } else {
4483     $self->{onerror}->(node => $item->{node},
4484 wakaba 1.104 level => $self->{level}->{should},
4485 wakaba 1.42 type => 'no significant content');
4486     }
4487     },
4488 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4489 wakaba 1.1 };
4490 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4491     ## What about |<section><object data><style scoped></style>x</object></section>|?
4492     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4493 wakaba 1.1
4494     $Element->{$HTML_NS}->{param} = {
4495 wakaba 1.40 %HTMLEmptyChecker,
4496 wakaba 1.187 status => FEATURE_HTML5_REC,
4497 wakaba 1.40 check_attrs => sub {
4498     my ($self, $item, $element_state) = @_;
4499 wakaba 1.1 $GetHTMLAttrsChecker->({
4500     name => sub { },
4501 wakaba 1.70 type => $HTMLIMTAttrChecker,
4502 wakaba 1.1 value => sub { },
4503 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4504     data => 1, ref => 1, object => 1,
4505     }),
4506 wakaba 1.49 }, {
4507     %HTMLAttrStatus,
4508 wakaba 1.154 href => FEATURE_RDFA_REC,
4509 wakaba 1.187 id => FEATURE_HTML5_REC,
4510 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4511 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4512 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4513 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4514 wakaba 1.66 })->(@_);
4515 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4516     $self->{onerror}->(node => $item->{node},
4517 wakaba 1.104 type => 'attribute missing',
4518     text => 'name',
4519     level => $self->{level}->{must});
4520 wakaba 1.1 }
4521 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4522     $self->{onerror}->(node => $item->{node},
4523 wakaba 1.104 type => 'attribute missing',
4524     text => 'value',
4525     level => $self->{level}->{must});
4526 wakaba 1.1 }
4527     },
4528     };
4529    
4530     $Element->{$HTML_NS}->{video} = {
4531 wakaba 1.40 %HTMLTransparentChecker,
4532 wakaba 1.48 status => FEATURE_HTML5_LC,
4533 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4534 wakaba 1.1 src => $HTMLURIAttrChecker,
4535     ## TODO: start, loopstart, loopend, end
4536     ## ISSUE: they MUST be "value time offset"s. Value?
4537 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4538 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4539 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4540     controls => $GetHTMLBooleanAttrChecker->('controls'),
4541 wakaba 1.59 poster => $HTMLURIAttrChecker,
4542 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4543     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4544 wakaba 1.50 }, {
4545     %HTMLAttrStatus,
4546 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4547 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4548     controls => FEATURE_HTML5_LC,
4549 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4550 wakaba 1.50 height => FEATURE_HTML5_LC,
4551 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4552     loopstart => FEATURE_HTML5_AT_RISK,
4553     playcount => FEATURE_HTML5_AT_RISK,
4554 wakaba 1.50 poster => FEATURE_HTML5_LC,
4555     src => FEATURE_HTML5_LC,
4556 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4557 wakaba 1.50 width => FEATURE_HTML5_LC,
4558 wakaba 1.1 }),
4559 wakaba 1.42 check_start => sub {
4560     my ($self, $item, $element_state) = @_;
4561     $element_state->{allow_source}
4562     = not $item->{node}->has_attribute_ns (undef, 'src');
4563     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4564     ## NOTE: It might be set true by |check_element|.
4565 wakaba 1.66
4566     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4567     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4568 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4569     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4570 wakaba 1.42 },
4571     check_child_element => sub {
4572     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4573     $child_is_transparent, $element_state) = @_;
4574 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4575     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4576 wakaba 1.42 $self->{onerror}->(node => $child_el,
4577     type => 'element not allowed:minus',
4578 wakaba 1.104 level => $self->{level}->{must});
4579 wakaba 1.42 delete $element_state->{allow_source};
4580     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4581     #
4582     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4583 wakaba 1.45 unless ($element_state->{allow_source}) {
4584 wakaba 1.104 $self->{onerror}->(node => $child_el,
4585 wakaba 1.72 type => 'element not allowed:flow',
4586 wakaba 1.104 level => $self->{level}->{must});
4587 wakaba 1.42 }
4588 wakaba 1.45 $element_state->{has_source} = 1;
4589 wakaba 1.1 } else {
4590 wakaba 1.42 delete $element_state->{allow_source};
4591 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4592 wakaba 1.42 }
4593     },
4594     check_child_text => sub {
4595     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4596     if ($has_significant) {
4597     delete $element_state->{allow_source};
4598     }
4599 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4600 wakaba 1.42 },
4601     check_end => sub {
4602     my ($self, $item, $element_state) = @_;
4603     if ($element_state->{has_source} == -1) {
4604     $self->{onerror}->(node => $item->{node},
4605 wakaba 1.104 type => 'child element missing',
4606     text => 'source',
4607     level => $self->{level}->{must});
4608 wakaba 1.1 }
4609 wakaba 1.42
4610     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4611 wakaba 1.1 },
4612     };
4613    
4614     $Element->{$HTML_NS}->{audio} = {
4615 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4616 wakaba 1.48 status => FEATURE_HTML5_LC,
4617 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4618     src => $HTMLURIAttrChecker,
4619     ## TODO: start, loopstart, loopend, end
4620     ## ISSUE: they MUST be "value time offset"s. Value?
4621     ## ISSUE: playcount has no conformance creteria
4622 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4623 wakaba 1.42 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4624     controls => $GetHTMLBooleanAttrChecker->('controls'),
4625 wakaba 1.50 }, {
4626     %HTMLAttrStatus,
4627 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4628 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4629     controls => FEATURE_HTML5_LC,
4630 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4631     loopend => FEATURE_HTML5_AT_RISK,
4632     loopstart => FEATURE_HTML5_AT_RISK,
4633     playcount => FEATURE_HTML5_AT_RISK,
4634 wakaba 1.50 src => FEATURE_HTML5_LC,
4635 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4636 wakaba 1.42 }),
4637 wakaba 1.1 };
4638    
4639     $Element->{$HTML_NS}->{source} = {
4640 wakaba 1.40 %HTMLEmptyChecker,
4641 wakaba 1.153 status => FEATURE_HTML5_LC,
4642 wakaba 1.40 check_attrs => sub {
4643     my ($self, $item, $element_state) = @_;
4644 wakaba 1.1 $GetHTMLAttrsChecker->({
4645 wakaba 1.90 media => $HTMLMQAttrChecker,
4646     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4647     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4648 wakaba 1.1 type => $HTMLIMTAttrChecker,
4649 wakaba 1.50 }, {
4650     %HTMLAttrStatus,
4651 wakaba 1.153 media => FEATURE_HTML5_LC,
4652     pixelratio => FEATURE_HTML5_LC,
4653     src => FEATURE_HTML5_LC,
4654     type => FEATURE_HTML5_LC,
4655 wakaba 1.66 })->(@_);
4656 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4657     $self->{onerror}->(node => $item->{node},
4658 wakaba 1.104 type => 'attribute missing',
4659     text => 'src',
4660     level => $self->{level}->{must});
4661 wakaba 1.1 }
4662 wakaba 1.66
4663     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4664 wakaba 1.1 },
4665     };
4666    
4667     $Element->{$HTML_NS}->{canvas} = {
4668 wakaba 1.40 %HTMLTransparentChecker,
4669 wakaba 1.187 status => FEATURE_HTML5_REC,
4670 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4671 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4672     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4673 wakaba 1.50 }, {
4674     %HTMLAttrStatus,
4675 wakaba 1.187 height => FEATURE_HTML5_REC,
4676     width => FEATURE_HTML5_REC,
4677 wakaba 1.1 }),
4678 wakaba 1.178
4679     # Authors MUST provide alternative content (HTML5 revision 2868) -
4680     # This requirement cannot be checked, since the alternative content
4681     # might be placed outside of the element.
4682     }; # canvas
4683 wakaba 1.1
4684     $Element->{$HTML_NS}->{map} = {
4685 wakaba 1.72 %HTMLFlowContentChecker,
4686 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4687 wakaba 1.40 check_attrs => sub {
4688     my ($self, $item, $element_state) = @_;
4689 wakaba 1.100 my $has_name;
4690 wakaba 1.4 $GetHTMLAttrsChecker->({
4691 wakaba 1.100 name => sub {
4692     my ($self, $attr) = @_;
4693     my $value = $attr->value;
4694     if (length $value) {
4695     ## NOTE: Duplication is not non-conforming.
4696     ## NOTE: Space characters are not non-conforming.
4697     #
4698     } else {
4699     $self->{onerror}->(node => $attr,
4700     type => 'empty attribute value',
4701 wakaba 1.104 level => $self->{level}->{must});
4702 wakaba 1.100 }
4703 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4704 wakaba 1.100 $has_name = [$value, $attr];
4705 wakaba 1.4 },
4706 wakaba 1.49 }, {
4707     %HTMLAttrStatus,
4708 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4709     dir => FEATURE_HTML5_REC,
4710     id => FEATURE_HTML5_REC,
4711     lang => FEATURE_HTML5_REC,
4712 wakaba 1.153 #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4713     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4714 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4715     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4716     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4717     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4718     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4719     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4720     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4721     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4722     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4723     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4724 wakaba 1.187 title => FEATURE_HTML5_REC,
4725 wakaba 1.66 })->(@_);
4726 wakaba 1.100
4727 wakaba 1.135 if ($has_name) {
4728 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4729 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4730 wakaba 1.155 $self->{onerror}
4731     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4732     type => 'id ne name',
4733     level => $self->{level}->{must});
4734 wakaba 1.100 }
4735 wakaba 1.135 } else {
4736 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4737 wakaba 1.104 type => 'attribute missing',
4738     text => 'name',
4739     level => $self->{level}->{must});
4740 wakaba 1.100 }
4741 wakaba 1.4 },
4742 wakaba 1.59 check_start => sub {
4743     my ($self, $item, $element_state) = @_;
4744     $element_state->{in_map_original} = $self->{flag}->{in_map};
4745 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4746     ## NOTE: |{in_map}| is a reference to the array which contains
4747     ## hash references. Hashes are corresponding to the opening
4748     ## |map| elements and each of them contains the key-value
4749     ## pairs corresponding to the absolute URLs for the processed
4750     ## |area| elements in the |map| element corresponding to the
4751     ## hash. The key represents the resource (## TODO: use
4752     ## absolute URL), while the value represents whether there is
4753     ## an |area| element whose |alt| attribute is specified to a
4754     ## non-empty value. If there IS such an |area| element for
4755     ## the resource specified by the key, then the value is set to
4756     ## zero (|0|). Otherwise, if there is no such an |area|
4757     ## element but there is any |area| element with the empty
4758     ## |alt=""| attribute, then the value contains an array
4759     ## reference that contains all of such |area| elements.
4760 wakaba 1.79
4761     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4762     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4763 wakaba 1.59 },
4764     check_end => sub {
4765     my ($self, $item, $element_state) = @_;
4766 wakaba 1.137
4767     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4768     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4769     next unless $nodes;
4770     for (@$nodes) {
4771     $self->{onerror}->(type => 'empty area alt',
4772     node => $_,
4773     level => $self->{level}->{html5_no_may});
4774     }
4775     }
4776    
4777     $self->{flag}->{in_map} = $element_state->{in_map_original};
4778    
4779 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4780 wakaba 1.59 },
4781 wakaba 1.1 };
4782    
4783     $Element->{$HTML_NS}->{area} = {
4784 wakaba 1.40 %HTMLEmptyChecker,
4785 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4786 wakaba 1.40 check_attrs => sub {
4787     my ($self, $item, $element_state) = @_;
4788 wakaba 1.1 my %attr;
4789     my $coords;
4790 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4791 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4792     $attr_ns = '' unless defined $attr_ns;
4793     my $attr_ln = $attr->manakai_local_name;
4794     my $checker;
4795 wakaba 1.73 my $status;
4796 wakaba 1.1 if ($attr_ns eq '') {
4797 wakaba 1.73 $status = {
4798     %HTMLAttrStatus,
4799     %HTMLM12NCommonAttrStatus,
4800 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
4801 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4802     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4803 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4804 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4805 wakaba 1.187 lang => FEATURE_HTML5_REC,
4806 wakaba 1.154 media => FEATURE_HTML5_WD,
4807 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4808     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4809     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4810 wakaba 1.153 ping => FEATURE_HTML5_WD,
4811 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4812 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4813 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4814 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4815     type => FEATURE_HTML5_WD,
4816 wakaba 1.73 }->{$attr_ln};
4817    
4818 wakaba 1.1 $checker = {
4819 wakaba 1.153 alt => sub {
4820     ## NOTE: Checked later.
4821     },
4822 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4823     circ => -1, circle => 1,
4824     default => 1,
4825     poly => 1, polygon => -1,
4826     rect => 1, rectangle => -1,
4827     }),
4828     coords => sub {
4829     my ($self, $attr) = @_;
4830     my $value = $attr->value;
4831     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4832     $coords = [split /,/, $value];
4833     } else {
4834     $self->{onerror}->(node => $attr,
4835 wakaba 1.104 type => 'coords:syntax error',
4836     level => $self->{level}->{must});
4837 wakaba 1.1 }
4838     },
4839 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4840     target => $HTMLTargetAttrChecker,
4841 wakaba 1.1 href => $HTMLURIAttrChecker,
4842     ping => $HTMLSpaceURIsAttrChecker,
4843 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4844 wakaba 1.1 media => $HTMLMQAttrChecker,
4845     hreflang => $HTMLLanguageTagAttrChecker,
4846     type => $HTMLIMTAttrChecker,
4847     }->{$attr_ln};
4848     if ($checker) {
4849     $attr{$attr_ln} = $attr;
4850 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4851     $attr_ln !~ /[A-Z]/) {
4852 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4853     $status = $HTMLDatasetAttrStatus;
4854 wakaba 1.1 } else {
4855     $checker = $HTMLAttrChecker->{$attr_ln};
4856     }
4857     }
4858     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4859 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4860     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4861     || $AttrStatus->{$attr_ns}->{''};
4862     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4863 wakaba 1.62
4864 wakaba 1.1 if ($checker) {
4865 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4866 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4867 wakaba 1.54 #
4868 wakaba 1.1 } else {
4869 wakaba 1.104 $self->{onerror}->(node => $attr,
4870     type => 'unknown attribute',
4871     level => $self->{level}->{uncertain});
4872 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4873     }
4874 wakaba 1.49
4875 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4876 wakaba 1.1 }
4877    
4878     if (defined $attr{href}) {
4879 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4880 wakaba 1.137 if (defined $attr{alt}) {
4881     my $url = $attr{href}->value; ## TODO: resolve
4882     if (length $attr{alt}->value) {
4883     for (@{$self->{flag}->{in_map} or []}) {
4884     $_->{$url} = 0;
4885     }
4886     } else {
4887     ## NOTE: Empty |alt=""|. If there is another |area| element
4888     ## with the same |href=""| and that |area| elemnet's
4889     ## |alt=""| attribute is not an empty string, then this
4890     ## is conforming.
4891     for (@{$self->{flag}->{in_map} or []}) {
4892     push @{$_->{$url} ||= []}, $attr{alt}
4893     unless exists $_->{$url} and not $_->{$url};
4894     }
4895     }
4896     } else {
4897 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4898 wakaba 1.104 type => 'attribute missing',
4899     text => 'alt',
4900     level => $self->{level}->{must});
4901 wakaba 1.1 }
4902     } else {
4903     for (qw/target ping rel media hreflang type alt/) {
4904     if (defined $attr{$_}) {
4905     $self->{onerror}->(node => $attr{$_},
4906 wakaba 1.104 type => 'attribute not allowed',
4907     level => $self->{level}->{must});
4908 wakaba 1.1 }
4909     }
4910     }
4911    
4912     my $shape = 'rectangle';
4913     if (defined $attr{shape}) {
4914     $shape = {
4915     circ => 'circle', circle => 'circle',
4916     default => 'default',
4917     poly => 'polygon', polygon => 'polygon',
4918     rect => 'rectangle', rectangle => 'rectangle',
4919     }->{lc $attr{shape}->value} || 'rectangle';
4920     ## TODO: ASCII lowercase?
4921     }
4922    
4923     if ($shape eq 'circle') {
4924     if (defined $attr{coords}) {
4925     if (defined $coords) {
4926     if (@$coords == 3) {
4927     if ($coords->[2] < 0) {
4928     $self->{onerror}->(node => $attr{coords},
4929 wakaba 1.104 type => 'coords:out of range',
4930     index => 2,
4931     value => $coords->[2],
4932     level => $self->{level}->{must});
4933 wakaba 1.1 }
4934     } else {
4935     $self->{onerror}->(node => $attr{coords},
4936 wakaba 1.104 type => 'coords:number not 3',
4937     text => 0+@$coords,
4938     level => $self->{level}->{must});
4939 wakaba 1.1 }
4940     } else {
4941     ## NOTE: A syntax error has been reported.
4942     }
4943     } else {
4944 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4945 wakaba 1.104 type => 'attribute missing',
4946     text => 'coords',
4947     level => $self->{level}->{must});
4948 wakaba 1.1 }
4949     } elsif ($shape eq 'default') {
4950     if (defined $attr{coords}) {
4951     $self->{onerror}->(node => $attr{coords},
4952 wakaba 1.104 type => 'attribute not allowed',
4953     level => $self->{level}->{must});
4954 wakaba 1.1 }
4955     } elsif ($shape eq 'polygon') {
4956     if (defined $attr{coords}) {
4957     if (defined $coords) {
4958     if (@$coords >= 6) {
4959     unless (@$coords % 2 == 0) {
4960     $self->{onerror}->(node => $attr{coords},
4961 wakaba 1.104 type => 'coords:number not even',
4962     text => 0+@$coords,
4963     level => $self->{level}->{must});
4964 wakaba 1.1 }
4965     } else {
4966     $self->{onerror}->(node => $attr{coords},
4967 wakaba 1.104 type => 'coords:number lt 6',
4968     text => 0+@$coords,
4969     level => $self->{level}->{must});
4970 wakaba 1.1 }
4971     } else {
4972     ## NOTE: A syntax error has been reported.
4973     }
4974     } else {
4975 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4976 wakaba 1.104 type => 'attribute missing',
4977     text => 'coords',
4978     level => $self->{level}->{must});
4979 wakaba 1.1 }
4980     } elsif ($shape eq 'rectangle') {
4981     if (defined $attr{coords}) {
4982     if (defined $coords) {
4983     if (@$coords == 4) {
4984     unless ($coords->[0] < $coords->[2]) {
4985     $self->{onerror}->(node => $attr{coords},
4986 wakaba 1.104 type => 'coords:out of range',
4987     index => 0,
4988     value => $coords->[0],
4989     level => $self->{level}->{must});
4990 wakaba 1.1 }
4991     unless ($coords->[1] < $coords->[3]) {
4992     $self->{onerror}->(node => $attr{coords},
4993 wakaba 1.104 type => 'coords:out of range',
4994     index => 1,
4995     value => $coords->[1],
4996     level => $self->{level}->{must});
4997 wakaba 1.1 }
4998     } else {
4999     $self->{onerror}->(node => $attr{coords},
5000 wakaba 1.104 type => 'coords:number not 4',
5001     text => 0+@$coords,
5002     level => $self->{level}->{must});
5003 wakaba 1.1 }
5004     } else {
5005     ## NOTE: A syntax error has been reported.
5006     }
5007     } else {
5008 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5009 wakaba 1.104 type => 'attribute missing',
5010     text => 'coords',
5011     level => $self->{level}->{must});
5012 wakaba 1.1 }
5013     }
5014 wakaba 1.66
5015     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
5016 wakaba 1.1 },
5017 wakaba 1.59 check_start => sub {
5018     my ($self, $item, $element_state) = @_;
5019     unless ($self->{flag}->{in_map} or
5020     not $item->{node}->manakai_parent_element) {
5021     $self->{onerror}->(node => $item->{node},
5022     type => 'element not allowed:area',
5023 wakaba 1.104 level => $self->{level}->{must});
5024 wakaba 1.59 }
5025 wakaba 1.79
5026     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5027     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5028 wakaba 1.59 },
5029 wakaba 1.1 };
5030    
5031     $Element->{$HTML_NS}->{table} = {
5032 wakaba 1.40 %HTMLChecker,
5033 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5034 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5035 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
5036     cellspacing => $HTMLLengthAttrChecker,
5037 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
5038     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
5039     lhs => 1, rhs => 1, box => 1, border => 1,
5040     }),
5041     rules => $GetHTMLEnumeratedAttrChecker->({
5042     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
5043     }),
5044     summary => sub {}, ## NOTE: %Text; in HTML4.
5045     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
5046     }, {
5047 wakaba 1.49 %HTMLAttrStatus,
5048 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5049 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5050     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5051     border => FEATURE_M12N10_REC,
5052     cellpadding => FEATURE_M12N10_REC,
5053     cellspacing => FEATURE_M12N10_REC,
5054 wakaba 1.61 cols => FEATURE_RFC1942,
5055 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5056     dataformatas => FEATURE_HTML4_REC_RESERVED,
5057     datapagesize => FEATURE_M12N10_REC,
5058     datasrc => FEATURE_HTML4_REC_RESERVED,
5059     frame => FEATURE_M12N10_REC,
5060 wakaba 1.187 lang => FEATURE_HTML5_REC,
5061 wakaba 1.49 rules => FEATURE_M12N10_REC,
5062     summary => FEATURE_M12N10_REC,
5063     width => FEATURE_M12N10_REC,
5064     }),
5065 wakaba 1.40 check_start => sub {
5066     my ($self, $item, $element_state) = @_;
5067     $element_state->{phase} = 'before caption';
5068 wakaba 1.66
5069     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5070 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5071     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5072 wakaba 1.40 },
5073     check_child_element => sub {
5074     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5075     $child_is_transparent, $element_state) = @_;
5076 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5077     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5078 wakaba 1.40 $self->{onerror}->(node => $child_el,
5079     type => 'element not allowed:minus',
5080 wakaba 1.104 level => $self->{level}->{must});
5081 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5082     #
5083     } elsif ($element_state->{phase} eq 'in tbodys') {
5084     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5085     #$element_state->{phase} = 'in tbodys';
5086     } elsif (not $element_state->{has_tfoot} and
5087     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5088     $element_state->{phase} = 'after tfoot';
5089     $element_state->{has_tfoot} = 1;
5090     } else {
5091 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5092     level => $self->{level}->{must});
5093 wakaba 1.40 }
5094     } elsif ($element_state->{phase} eq 'in trs') {
5095     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5096     #$element_state->{phase} = 'in trs';
5097     } elsif (not $element_state->{has_tfoot} and
5098     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5099     $element_state->{phase} = 'after tfoot';
5100     $element_state->{has_tfoot} = 1;
5101     } else {
5102 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5103     level => $self->{level}->{must});
5104 wakaba 1.40 }
5105     } elsif ($element_state->{phase} eq 'after thead') {
5106     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5107     $element_state->{phase} = 'in tbodys';
5108     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5109     $element_state->{phase} = 'in trs';
5110     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5111     $element_state->{phase} = 'in tbodys';
5112     $element_state->{has_tfoot} = 1;
5113     } else {
5114 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5115     level => $self->{level}->{must});
5116 wakaba 1.40 }
5117     } elsif ($element_state->{phase} eq 'in colgroup') {
5118     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5119     $element_state->{phase} = 'in colgroup';
5120     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5121     $element_state->{phase} = 'after thead';
5122     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5123     $element_state->{phase} = 'in tbodys';
5124     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5125     $element_state->{phase} = 'in trs';
5126     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5127     $element_state->{phase} = 'in tbodys';
5128     $element_state->{has_tfoot} = 1;
5129     } else {
5130 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5131     level => $self->{level}->{must});
5132 wakaba 1.40 }
5133     } elsif ($element_state->{phase} eq 'before caption') {
5134     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5135 wakaba 1.181 $item->{parent_state}->{table_caption_element} = $child_el;
5136 wakaba 1.40 $element_state->{phase} = 'in colgroup';
5137     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5138     $element_state->{phase} = 'in colgroup';
5139     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5140     $element_state->{phase} = 'after thead';
5141     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5142     $element_state->{phase} = 'in tbodys';
5143     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5144     $element_state->{phase} = 'in trs';
5145     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5146     $element_state->{phase} = 'in tbodys';
5147     $element_state->{has_tfoot} = 1;
5148     } else {
5149 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5150     level => $self->{level}->{must});
5151 wakaba 1.40 }
5152     } elsif ($element_state->{phase} eq 'after tfoot') {
5153 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5154     level => $self->{level}->{must});
5155 wakaba 1.40 } else {
5156     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5157     }
5158     },
5159     check_child_text => sub {
5160     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5161     if ($has_significant) {
5162 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5163     level => $self->{level}->{must});
5164 wakaba 1.1 }
5165 wakaba 1.40 },
5166     check_end => sub {
5167     my ($self, $item, $element_state) = @_;
5168 wakaba 1.1
5169     ## Table model errors
5170     require Whatpm::HTMLTable;
5171 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5172 wakaba 1.104 $self->{onerror}->(@_);
5173     }, $self->{level});
5174 wakaba 1.87 Whatpm::HTMLTable->assign_header
5175 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5176 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5177 wakaba 1.1
5178 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5179 wakaba 1.1 },
5180     };
5181    
5182     $Element->{$HTML_NS}->{caption} = {
5183 wakaba 1.169 %HTMLFlowContentChecker,
5184 wakaba 1.187 status => FEATURE_HTML5_REC,
5185 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5186     align => $GetHTMLEnumeratedAttrChecker->({
5187     top => 1, bottom => 1, left => 1, right => 1,
5188     }),
5189     }, {
5190 wakaba 1.49 %HTMLAttrStatus,
5191 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5192 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5193 wakaba 1.187 lang => FEATURE_HTML5_REC,
5194 wakaba 1.49 }),
5195 wakaba 1.169 check_start => sub {
5196     my ($self, $item, $element_state) = @_;
5197     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5198    
5199     $HTMLFlowContentChecker{check_start}->(@_);
5200     },
5201     check_end => sub {
5202     my ($self, $item, $element_state) = @_;
5203     $self->_remove_minus_elements ($element_state);
5204    
5205     $HTMLFlowContentChecker{check_end}->(@_);
5206     },
5207     }; # caption
5208 wakaba 1.1
5209 wakaba 1.69 my %cellalign = (
5210     ## HTML4 %cellhalign;
5211 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5212     left => 1, center => 1, right => 1, justify => 1, char => 1,
5213     }),
5214     char => sub {
5215     my ($self, $attr) = @_;
5216 wakaba 1.69
5217 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5218    
5219     my $value = $attr->value;
5220     if (length $value != 1) {
5221     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5222 wakaba 1.105 level => $self->{level}->{html4_fact});
5223 wakaba 1.70 }
5224     },
5225 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5226    
5227 wakaba 1.69 ## HTML4 %cellvalign;
5228 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5229     top => 1, middle => 1, bottom => 1, baseline => 1,
5230     }),
5231 wakaba 1.69 );
5232    
5233 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5234 wakaba 1.40 %HTMLEmptyChecker,
5235 wakaba 1.187 status => FEATURE_HTML5_REC,
5236 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5237 wakaba 1.69 %cellalign,
5238 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5239     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5240     ## TODO: "attribute not supported" if |col|.
5241     ## ISSUE: MUST NOT if any |col|?
5242     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5243 wakaba 1.49 }, {
5244     %HTMLAttrStatus,
5245 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5246 wakaba 1.49 align => FEATURE_M12N10_REC,
5247     char => FEATURE_M12N10_REC,
5248     charoff => FEATURE_M12N10_REC,
5249 wakaba 1.187 lang => FEATURE_HTML5_REC,
5250 wakaba 1.153 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5251 wakaba 1.49 valign => FEATURE_M12N10_REC,
5252     width => FEATURE_M12N10_REC,
5253 wakaba 1.1 }),
5254 wakaba 1.40 check_child_element => sub {
5255     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5256     $child_is_transparent, $element_state) = @_;
5257 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5258     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5259 wakaba 1.40 $self->{onerror}->(node => $child_el,
5260     type => 'element not allowed:minus',
5261 wakaba 1.104 level => $self->{level}->{must});
5262 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5263     #
5264     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5265     #
5266     } else {
5267 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5268     level => $self->{level}->{must});
5269 wakaba 1.40 }
5270     },
5271     check_child_text => sub {
5272     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5273     if ($has_significant) {
5274 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5275     level => $self->{level}->{must});
5276 wakaba 1.1 }
5277     },
5278     };
5279    
5280     $Element->{$HTML_NS}->{col} = {
5281 wakaba 1.40 %HTMLEmptyChecker,
5282 wakaba 1.187 status => FEATURE_HTML5_REC,
5283 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5284 wakaba 1.69 %cellalign,
5285 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5286 wakaba 1.49 }, {
5287     %HTMLAttrStatus,
5288 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5289 wakaba 1.49 align => FEATURE_M12N10_REC,
5290     char => FEATURE_M12N10_REC,
5291     charoff => FEATURE_M12N10_REC,
5292 wakaba 1.187 lang => FEATURE_HTML5_REC,
5293 wakaba 1.153 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5294 wakaba 1.49 valign => FEATURE_M12N10_REC,
5295     width => FEATURE_M12N10_REC,
5296 wakaba 1.1 }),
5297     };
5298    
5299     $Element->{$HTML_NS}->{tbody} = {
5300 wakaba 1.40 %HTMLChecker,
5301 wakaba 1.187 status => FEATURE_HTML5_REC,
5302 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5303     %cellalign,
5304     }, {
5305 wakaba 1.49 %HTMLAttrStatus,
5306 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5307 wakaba 1.49 align => FEATURE_M12N10_REC,
5308     char => FEATURE_M12N10_REC,
5309     charoff => FEATURE_M12N10_REC,
5310 wakaba 1.187 lang => FEATURE_HTML5_REC,
5311 wakaba 1.49 valign => FEATURE_M12N10_REC,
5312     }),
5313 wakaba 1.40 check_child_element => sub {
5314     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5315     $child_is_transparent, $element_state) = @_;
5316 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5317     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5318 wakaba 1.40 $self->{onerror}->(node => $child_el,
5319     type => 'element not allowed:minus',
5320 wakaba 1.104 level => $self->{level}->{must});
5321 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5322     #
5323     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5324 wakaba 1.84 #
5325 wakaba 1.40 } else {
5326 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5327     level => $self->{level}->{must});
5328 wakaba 1.40 }
5329     },
5330     check_child_text => sub {
5331     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5332     if ($has_significant) {
5333 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5334     level => $self->{level}->{must});
5335 wakaba 1.1 }
5336 wakaba 1.40 },
5337 wakaba 1.1 };
5338    
5339     $Element->{$HTML_NS}->{thead} = {
5340 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5341 wakaba 1.1 };
5342    
5343     $Element->{$HTML_NS}->{tfoot} = {
5344 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5345 wakaba 1.1 };
5346    
5347     $Element->{$HTML_NS}->{tr} = {
5348 wakaba 1.40 %HTMLChecker,
5349 wakaba 1.187 status => FEATURE_HTML5_REC,
5350 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5351     %cellalign,
5352     bgcolor => $HTMLColorAttrChecker,
5353     }, {
5354 wakaba 1.49 %HTMLAttrStatus,
5355 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5356 wakaba 1.49 align => FEATURE_M12N10_REC,
5357     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5358     char => FEATURE_M12N10_REC,
5359     charoff => FEATURE_M12N10_REC,
5360 wakaba 1.187 lang => FEATURE_HTML5_REC,
5361 wakaba 1.49 valign => FEATURE_M12N10_REC,
5362     }),
5363 wakaba 1.40 check_child_element => sub {
5364     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5365     $child_is_transparent, $element_state) = @_;
5366 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5367     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5368 wakaba 1.40 $self->{onerror}->(node => $child_el,
5369     type => 'element not allowed:minus',
5370 wakaba 1.104 level => $self->{level}->{must});
5371 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5372     #
5373     } elsif ($child_nsuri eq $HTML_NS and
5374     ($child_ln eq 'td' or $child_ln eq 'th')) {
5375 wakaba 1.84 #
5376 wakaba 1.40 } else {
5377 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5378     level => $self->{level}->{must});
5379 wakaba 1.40 }
5380     },
5381     check_child_text => sub {
5382     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5383     if ($has_significant) {
5384 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5385     level => $self->{level}->{must});
5386 wakaba 1.1 }
5387     },
5388     };
5389    
5390     $Element->{$HTML_NS}->{td} = {
5391 wakaba 1.72 %HTMLFlowContentChecker,
5392 wakaba 1.187 status => FEATURE_HTML5_REC,
5393 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5394 wakaba 1.69 %cellalign,
5395     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5396     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5397     bgcolor => $HTMLColorAttrChecker,
5398 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5399 wakaba 1.87 headers => sub {
5400     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5401     ## Though that method does not check the |headers| attribute of a
5402     ## |td| element if the element does not form a table, in that case
5403     ## the |td| element is non-conforming anyway.
5404     },
5405 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5406 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5407 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5408     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5409 wakaba 1.49 }, {
5410     %HTMLAttrStatus,
5411 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5412     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5413 wakaba 1.49 align => FEATURE_M12N10_REC,
5414 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5415 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5416     char => FEATURE_M12N10_REC,
5417     charoff => FEATURE_M12N10_REC,
5418 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5419 wakaba 1.187 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5420 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5421 wakaba 1.187 lang => FEATURE_HTML5_REC,
5422 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5423 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5424 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5425 wakaba 1.49 valign => FEATURE_M12N10_REC,
5426     width => FEATURE_M12N10_REC_DEPRECATED,
5427 wakaba 1.1 }),
5428     };
5429    
5430     $Element->{$HTML_NS}->{th} = {
5431 wakaba 1.40 %HTMLPhrasingContentChecker,
5432 wakaba 1.187 status => FEATURE_HTML5_REC,
5433 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5434 wakaba 1.69 %cellalign,
5435     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5436     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5437     bgcolor => $HTMLColorAttrChecker,
5438 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5439 wakaba 1.87 ## TODO: HTML4(?) |headers|
5440 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5441 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5442     scope => $GetHTMLEnumeratedAttrChecker
5443     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5444 wakaba 1.49 }, {
5445     %HTMLAttrStatus,
5446 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5447     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5448 wakaba 1.49 align => FEATURE_M12N10_REC,
5449 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5450 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5451     char => FEATURE_M12N10_REC,
5452     charoff => FEATURE_M12N10_REC,
5453 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5454 wakaba 1.187 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5455 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5456 wakaba 1.187 lang => FEATURE_HTML5_REC,
5457 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5458 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5459 wakaba 1.187 scope => FEATURE_HTML5_REC,
5460 wakaba 1.49 valign => FEATURE_M12N10_REC,
5461     width => FEATURE_M12N10_REC_DEPRECATED,
5462 wakaba 1.1 }),
5463     };
5464    
5465 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5466 wakaba 1.121 %HTMLFlowContentChecker,
5467 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5468 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5469 wakaba 1.161 accept => $AcceptAttrChecker,
5470 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5471 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5472 wakaba 1.185 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5473     on => 1, off => 1,
5474     }),
5475 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5476 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5477     'application/x-www-form-urlencoded' => 1,
5478     'multipart/form-data' => 1,
5479     'text/plain' => 1,
5480     }),
5481 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5482     get => 1, post => 1, put => 1, delete => 1,
5483     }),
5484 wakaba 1.133 name => sub {
5485     my ($self, $attr) = @_;
5486    
5487     my $value = $attr->value;
5488     if ($value eq '') {
5489     $self->{onerror}->(type => 'empty form name',
5490     node => $attr,
5491     level => $self->{level}->{must});
5492     } else {
5493     if ($self->{form}->{$value}) {
5494     $self->{onerror}->(type => 'duplicate form name',
5495     node => $attr,
5496     value => $value,
5497     level => $self->{level}->{must});
5498     } else {
5499     $self->{form}->{$value} = 1;
5500     }
5501     }
5502     },
5503 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5504     ## TODO: Tests for following attrs:
5505 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5506     onforminput => $HTMLEventHandlerAttrChecker,
5507 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5508     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5509 wakaba 1.52 target => $HTMLTargetAttrChecker,
5510     }, {
5511     %HTMLAttrStatus,
5512     %HTMLM12NCommonAttrStatus,
5513 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5514 wakaba 1.187 'accept-charset' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5515 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5516 wakaba 1.185 autocomplete => FEATURE_HTML5_WD,
5517 wakaba 1.56 data => FEATURE_WF2,
5518 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5519 wakaba 1.187 lang => FEATURE_HTML5_REC,
5520 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5521     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5522 wakaba 1.187 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5523 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5524 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5525     onforminput => FEATURE_WF2_INFORMATIVE,
5526 wakaba 1.56 onreceived => FEATURE_WF2,
5527 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5528     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5529 wakaba 1.56 replace => FEATURE_WF2,
5530 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5531     sdasuff => FEATURE_HTML20_RFC,
5532 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5533 wakaba 1.52 }),
5534 wakaba 1.66 check_start => sub {
5535     my ($self, $item, $element_state) = @_;
5536 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5537 wakaba 1.66
5538     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5539     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5540 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5541     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5542 wakaba 1.136 $element_state->{id_type} = 'form';
5543 wakaba 1.66 },
5544 wakaba 1.121 check_end => sub {
5545     my ($self, $item, $element_state) = @_;
5546     $self->_remove_minus_elements ($element_state);
5547    
5548     $HTMLFlowContentChecker{check_end}->(@_);
5549     },
5550 wakaba 1.185 }; # form
5551 wakaba 1.52
5552     $Element->{$HTML_NS}->{fieldset} = {
5553 wakaba 1.134 %HTMLFlowContentChecker,
5554 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5555 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5556     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5557 wakaba 1.136 form => $HTMLFormAttrChecker,
5558 wakaba 1.165 name => $FormControlNameAttrChecker,
5559 wakaba 1.56 }, {
5560 wakaba 1.52 %HTMLAttrStatus,
5561     %HTMLM12NCommonAttrStatus,
5562 wakaba 1.187 disabled => FEATURE_HTML5_WD | FEATURE_WF2X,
5563     form => FEATURE_HTML5_LC | FEATURE_WF2X,
5564     lang => FEATURE_HTML5_REC,
5565     name => FEATURE_HTML5_LC,
5566 wakaba 1.52 }),
5567 wakaba 1.134 ## NOTE: legend, Flow
5568     check_child_element => sub {
5569     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5570     $child_is_transparent, $element_state) = @_;
5571     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5572     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5573     $self->{onerror}->(node => $child_el,
5574     type => 'element not allowed:minus',
5575     level => $self->{level}->{must});
5576     $element_state->{has_non_legend} = 1;
5577     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5578     #
5579     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5580     if ($element_state->{has_non_legend}) {
5581     $self->{onerror}->(node => $child_el,
5582     type => 'element not allowed:details legend',
5583     level => $self->{level}->{must});
5584     }
5585     $element_state->{has_legend} = 1;
5586     $element_state->{has_non_legend} = 1;
5587     } else {
5588     $HTMLFlowContentChecker{check_child_element}->(@_);
5589     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5590     ## TODO:
5591 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5592 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5593     ## therefore |details| part of the content model does not match.
5594     }
5595     },
5596     check_child_text => sub {
5597     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5598     if ($has_significant) {
5599     $element_state->{has_non_legend} = 1;
5600     }
5601     },
5602     check_end => sub {
5603     my ($self, $item, $element_state) = @_;
5604    
5605     unless ($element_state->{has_legend}) {
5606     $self->{onerror}->(node => $item->{node},
5607     type => 'child element missing',
5608     text => 'legend',
5609     level => $self->{level}->{must});
5610     }
5611    
5612     $HTMLFlowContentChecker{check_end}->(@_);
5613 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5614 wakaba 1.134 },
5615     ## NOTE: This definition is partially reused by |details| element's
5616     ## checker.
5617 wakaba 1.52 };
5618    
5619     $Element->{$HTML_NS}->{input} = {
5620 wakaba 1.119 %HTMLEmptyChecker,
5621 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5622 wakaba 1.140 check_attrs => sub {
5623     my ($self, $item, $element_state) = @_;
5624 wakaba 1.142
5625 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5626 wakaba 1.142 $state = 'text' unless defined $state;
5627     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5628    
5629 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5630     my $attr_ns = $attr->namespace_uri;
5631     $attr_ns = '' unless defined $attr_ns;
5632     my $attr_ln = $attr->manakai_local_name;
5633     my $checker;
5634     my $status;
5635     if ($attr_ns eq '') {
5636     $status =
5637     {
5638     %HTMLAttrStatus,
5639     %HTMLM12NCommonAttrStatus,
5640     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5641     'accept-charset' => FEATURE_HTML2X_RFC,
5642 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5643 wakaba 1.140 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5644     align => FEATURE_M12N10_REC_DEPRECATED,
5645     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5646 wakaba 1.185 autocomplete => FEATURE_HTML5_LC | FEATURE_WF2X,
5647 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
5648     checked => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5649 wakaba 1.140 datafld => FEATURE_HTML4_REC_RESERVED,
5650     dataformatas => FEATURE_HTML4_REC_RESERVED,
5651     datasrc => FEATURE_HTML4_REC_RESERVED,
5652 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5653 wakaba 1.140 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5654 wakaba 1.187 form => FEATURE_HTML5_LC | FEATURE_WF2X,
5655 wakaba 1.178 height => FEATURE_HTML5_LC,
5656 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5657     FEATURE_XHTMLBASIC11_CR,
5658 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5659 wakaba 1.187 lang => FEATURE_HTML5_REC,
5660     list => FEATURE_HTML5_LC | FEATURE_WF2X,
5661     max => FEATURE_HTML5_LC | FEATURE_WF2X,
5662     maxlength => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5663 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5664 wakaba 1.187 min => FEATURE_HTML5_LC | FEATURE_WF2X,
5665     multiple => FEATURE_HTML5_LC,
5666     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5667 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5668 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5669     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5670     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5671     onformchange => FEATURE_WF2_INFORMATIVE,
5672     onforminput => FEATURE_WF2_INFORMATIVE,
5673     oninput => FEATURE_WF2,
5674     oninvalid => FEATURE_WF2,
5675     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5676 wakaba 1.187 pattern => FEATURE_HTML5_LC | FEATURE_WF2X,
5677     placeholder => FEATURE_HTML5_LC,
5678     readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5679 wakaba 1.140 replace => FEATURE_WF2,
5680 wakaba 1.187 required => FEATURE_HTML5_LC | FEATURE_WF2X,
5681 wakaba 1.140 sdapref => FEATURE_HTML20_RFC,
5682 wakaba 1.187 size => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5683 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5684 wakaba 1.187 step => FEATURE_HTML5_LC | FEATURE_WF2X,
5685 wakaba 1.140 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5686     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5687 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5688 wakaba 1.187 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5689 wakaba 1.140 usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5690 wakaba 1.187 value => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5691 wakaba 1.178 width => FEATURE_HTML5_LC,
5692 wakaba 1.140 }->{$attr_ln};
5693    
5694     $checker =
5695     {
5696 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5697     ## applicable for a specific set of states.
5698 wakaba 1.142 accept => '',
5699 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5700     ## NOTE: To which states it applies is not defined in RFC 2070.
5701 wakaba 1.142 action => '',
5702 wakaba 1.150 align => '',
5703 wakaba 1.141 alt => '',
5704 wakaba 1.142 autocomplete => '',
5705 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5706     ## NOTE: <input type=hidden disabled> is not disallowed.
5707 wakaba 1.142 checked => '',
5708     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5709 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5710 wakaba 1.142 enctype => '',
5711     form => $HTMLFormAttrChecker,
5712 wakaba 1.178 height => '',
5713 wakaba 1.150 inputmode => '',
5714     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5715 wakaba 1.142 list => '',
5716     max => '',
5717     maxlength => '',
5718     method => '',
5719     min => '',
5720 wakaba 1.156 multiple => '',
5721 wakaba 1.165 name => $FormControlNameAttrChecker,
5722 wakaba 1.166 novalidate => '',
5723 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5724     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5725     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5726     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5727     ## TODO: tests for four attributes above
5728 wakaba 1.142 pattern => '',
5729 wakaba 1.156 placeholder => '',
5730 wakaba 1.142 readonly => '',
5731 wakaba 1.150 replace => '',
5732 wakaba 1.142 required => '',
5733     size => '',
5734     src => '',
5735     step => '',
5736     target => '',
5737 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5738 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5739     email => 1, password => 1,
5740 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5741 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5742     checkbox => 1,
5743 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5744     button => 1,
5745 wakaba 1.140 }),
5746 wakaba 1.151 usemap => '',
5747 wakaba 1.142 value => '',
5748 wakaba 1.178 width => '',
5749 wakaba 1.140 }->{$attr_ln};
5750 wakaba 1.141
5751     ## State-dependent checkers
5752     unless ($checker) {
5753     if ($state eq 'hidden') {
5754     $checker =
5755     {
5756 wakaba 1.142 value => sub {
5757     my ($self, $attr, $item, $element_state) = @_;
5758 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5759 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5760     $self->{onerror}->(node => $attr,
5761     type => '_charset_ value',
5762     level => $self->{level}->{must});
5763     }
5764     },
5765 wakaba 1.141 }->{$attr_ln} || $checker;
5766 wakaba 1.142 ## TODO: Warn if no name attribute?
5767     ## TODO: Warn if name!=_charset_ and no value attribute?
5768 wakaba 1.168 } elsif ({
5769     datetime => 1, date => 1, month => 1, time => 1,
5770     week => 1, 'datetime-local' => 1,
5771     }->{$state}) {
5772     my $v = {
5773     datetime => ['global_date_and_time_string'],
5774     date => ['date_string'],
5775     month => ['month_string'],
5776     week => ['week_string'],
5777     time => ['time_string'],
5778     'datetime-local' => ['local_date_and_time_string'],
5779     }->{$state};
5780 wakaba 1.144 $checker =
5781     {
5782 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5783     on => 1, off => 1,
5784     }),
5785 wakaba 1.158 list => $ListAttrChecker,
5786 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5787     max => $GetDateTimeAttrChecker->($v->[0]),
5788 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5789 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5790 wakaba 1.148 step => $StepAttrChecker,
5791 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5792 wakaba 1.144 }->{$attr_ln} || $checker;
5793     } elsif ($state eq 'number') {
5794     $checker =
5795     {
5796 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5797     on => 1, off => 1,
5798     }),
5799 wakaba 1.158 list => $ListAttrChecker,
5800 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5801     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5802 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5803 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5804 wakaba 1.148 step => $StepAttrChecker,
5805 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5806 wakaba 1.144 }->{$attr_ln} || $checker;
5807     } elsif ($state eq 'range') {
5808     $checker =
5809     {
5810 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5811     on => 1, off => 1,
5812     }),
5813 wakaba 1.158 list => $ListAttrChecker,
5814 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5815     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5816 wakaba 1.148 step => $StepAttrChecker,
5817 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5818 wakaba 1.144 }->{$attr_ln} || $checker;
5819 wakaba 1.157 } elsif ($state eq 'color') {
5820     $checker =
5821     {
5822     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5823     on => 1, off => 1,
5824     }),
5825 wakaba 1.158 list => $ListAttrChecker,
5826 wakaba 1.157 value => sub {
5827     my ($self, $attr) = @_;
5828     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5829     $self->{onerror}->(node => $attr,
5830     type => 'scolor:syntax error', ## TODOC: type
5831     level => $self->{level}->{must});
5832     }
5833     },
5834     }->{$attr_ln} || $checker;
5835 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5836     $checker =
5837     {
5838 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5839     ## TODO: tests
5840 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5841 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5842     }->{$attr_ln} || $checker;
5843     ## TODO: There MUST be another input type=radio with same
5844     ## name (Radio state).
5845     ## ISSUE: There should be exactly one type=radio with checked?
5846     } elsif ($state eq 'file') {
5847     $checker =
5848     {
5849 wakaba 1.161 accept => $AcceptAttrChecker,
5850 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5851 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5852 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5853 wakaba 1.144 }->{$attr_ln} || $checker;
5854     } elsif ($state eq 'submit') {
5855     $checker =
5856     {
5857 wakaba 1.149 action => $HTMLURIAttrChecker,
5858 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5859     'application/x-www-form-urlencoded' => 1,
5860     'multipart/form-data' => 1,
5861     'text/plain' => 1,
5862     }),
5863 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5864     get => 1, post => 1, put => 1, delete => 1,
5865     }),
5866 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5867 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5868     document => 1, values => 1,
5869     }),
5870     target => $HTMLTargetAttrChecker,
5871 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5872     }->{$attr_ln} || $checker;
5873     } elsif ($state eq 'image') {
5874     $checker =
5875     {
5876 wakaba 1.149 action => $HTMLURIAttrChecker,
5877     align => $GetHTMLEnumeratedAttrChecker->({
5878     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5879     }),
5880 wakaba 1.144 alt => sub {
5881     my ($self, $attr) = @_;
5882     my $value = $attr->value;
5883     unless (length $value) {
5884     $self->{onerror}->(node => $attr,
5885     type => 'empty anchor image alt',
5886     level => $self->{level}->{must});
5887     }
5888     },
5889 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5890     'application/x-www-form-urlencoded' => 1,
5891     'multipart/form-data' => 1,
5892     'text/plain' => 1,
5893     }),
5894 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
5895 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5896     method => $GetHTMLEnumeratedAttrChecker->({
5897     get => 1, post => 1, put => 1, delete => 1,
5898     }),
5899 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5900 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5901     document => 1, values => 1,
5902     }),
5903 wakaba 1.144 src => $HTMLURIAttrChecker,
5904     ## TODO: There is requirements on the referenced resource.
5905 wakaba 1.149 target => $HTMLTargetAttrChecker,
5906     usemap => $HTMLUsemapAttrChecker,
5907 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
5908 wakaba 1.144 }->{$attr_ln} || $checker;
5909     ## TODO: alt & src are required.
5910     } elsif ({
5911     reset => 1, button => 1,
5912     ## NOTE: From Web Forms 2.0:
5913     remove => 1, 'move-up' => 1, 'move-down' => 1,
5914     add => 1,
5915     }->{$state}) {
5916     $checker =
5917     {
5918     ## NOTE: According to Web Forms 2.0, |input| attribute
5919     ## has |template| attribute to support the |add| button
5920     ## type (as part of the repetition template feature). It
5921     ## conflicts with the |template| global attribute
5922     ## introduced as part of the data template feature.
5923     ## NOTE: |template| attribute as defined in Web Forms 2.0
5924     ## has no author requirement.
5925     value => sub { }, ## NOTE: No restriction.
5926     }->{$attr_ln} || $checker;
5927 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5928 wakaba 1.141 $checker =
5929     {
5930 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5931     on => 1, off => 1,
5932     }),
5933 wakaba 1.149 ## TODO: inputmode [WF2]
5934 wakaba 1.158 list => $ListAttrChecker,
5935 wakaba 1.147 maxlength => sub {
5936     my ($self, $attr, $item, $element_state) = @_;
5937    
5938     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5939    
5940 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5941 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5942     ## integers results in a number.
5943     my $max_allowed_value_length = 0+$1;
5944    
5945     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5946     if (defined $value) {
5947     my $codepoint_length = length $value;
5948 wakaba 1.162
5949 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5950     $self->{onerror}
5951     ->(node => $item->{node}
5952     ->get_attribute_node_ns (undef, 'value'),
5953     type => 'value too long',
5954     level => $self->{level}->{must});
5955     }
5956     }
5957     }
5958     },
5959 wakaba 1.160 pattern => $PatternAttrChecker,
5960 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
5961 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5962 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5963 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5964 wakaba 1.143 value => sub {
5965 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5966     if ($state eq 'url') {
5967     $HTMLURIAttrChecker->(@_);
5968     } elsif ($state eq 'email') {
5969     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5970     my @addr = split /,/, $attr->value, -1;
5971     @addr = ('') unless @addr;
5972     for (@addr) {
5973 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5974     s/[\x09\x0A\x0C\x0D\x20]\z//;
5975 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5976     $self->{onerror}->(node => $attr,
5977     type => 'email:syntax error', ## TODO: type
5978     value => $_,
5979     level => $self->{level}->{must});
5980     }
5981     }
5982     } else {
5983     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5984     $self->{onerror}->(node => $attr,
5985     type => 'email:syntax error', ## TODO: type
5986     level => $self->{level}->{must});
5987     }
5988     }
5989     } else {
5990     if ($attr->value =~ /[\x0D\x0A]/) {
5991     $self->{onerror}->(node => $attr,
5992     type => 'newline in value', ## TODO: type
5993     level => $self->{level}->{must});
5994     }
5995     }
5996 wakaba 1.143 },
5997 wakaba 1.141 }->{$attr_ln} || $checker;
5998 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5999 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
6000     if $state eq 'email' and $attr_ln eq 'multiple';
6001 wakaba 1.161
6002     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6003     not $item->{node}->has_attribute_ns (undef, 'title')) {
6004     $self->{onerror}->(node => $item->{node},
6005     type => 'attribute missing',
6006     text => 'title',
6007     level => $self->{level}->{should});
6008     }
6009 wakaba 1.141 }
6010     }
6011    
6012     if (defined $checker) {
6013     if ($checker eq '') {
6014     $checker = sub {
6015     my ($self, $attr) = @_;
6016     $self->{onerror}->(node => $attr,
6017     type => 'input attr not applicable',
6018     text => $state,
6019     level => $self->{level}->{must});
6020     };
6021     }
6022 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
6023     $attr_ln !~ /[A-Z]/) {
6024     $checker = $HTMLDatasetAttrChecker;
6025     $status = $HTMLDatasetAttrStatus;
6026     } else {
6027     $checker = $HTMLAttrChecker->{$attr_ln};
6028     }
6029     }
6030     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
6031     || $AttrChecker->{$attr_ns}->{''};
6032     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
6033     || $AttrStatus->{$attr_ns}->{''};
6034     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
6035 wakaba 1.157
6036 wakaba 1.140 if ($checker) {
6037     $checker->($self, $attr, $item, $element_state) if ref $checker;
6038     } elsif ($attr_ns eq '' and not $status) {
6039     #
6040     } else {
6041     $self->{onerror}->(node => $attr,
6042     type => 'unknown attribute',
6043     level => $self->{level}->{uncertain});
6044     ## ISSUE: No comformance createria for unknown attributes in the spec
6045     }
6046    
6047     $self->_attr_status_info ($attr, $status);
6048     }
6049 wakaba 1.168
6050     ## ISSUE: -0/+0
6051    
6052     if ($state eq 'range') {
6053     $element_state->{number_value}->{min} ||= 0;
6054     $element_state->{number_value}->{max} = 100
6055     unless defined $element_state->{number_value}->{max};
6056     }
6057    
6058     if (defined $element_state->{date_value}->{min} or
6059     defined $element_state->{date_value}->{max}) {
6060     my $min_value = $element_state->{date_value}->{min};
6061     my $max_value = $element_state->{date_value}->{max};
6062     my $value_value = $element_state->{date_value}->{value};
6063    
6064     if (defined $min_value and $min_value eq '' and
6065     (defined $max_value or defined $value_value)) {
6066     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6067     $self->{onerror}->(node => $min,
6068     type => 'date value not supported', ## TODOC: type
6069     value => $min->value,
6070     level => $self->{level}->{unsupported});
6071     undef $min_value;
6072     }
6073     if (defined $max_value and $max_value eq '' and
6074     (defined $max_value or defined $value_value)) {
6075     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6076     $self->{onerror}->(node => $max,
6077     type => 'date value not supported', ## TODOC: type
6078     value => $max->value,
6079     level => $self->{level}->{unsupported});
6080     undef $max_value;
6081     }
6082     if (defined $value_value and $value_value eq '' and
6083     (defined $max_value or defined $min_value)) {
6084     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6085     $self->{onerror}->(node => $value,
6086     type => 'date value not supported', ## TODOC: type
6087     value => $value->value,
6088     level => $self->{level}->{unsupported});
6089     undef $value_value;
6090     }
6091    
6092     if (defined $min_value and defined $max_value) {
6093     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6094     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6095     $self->{onerror}->(node => $max,
6096     type => 'max lt min', ## TODOC: type
6097     level => $self->{level}->{must});
6098     }
6099     }
6100    
6101     if (defined $min_value and defined $value_value) {
6102     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6103     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6104     $self->{onerror}->(node => $value,
6105     type => 'value lt min', ## TODOC: type
6106     level => $self->{level}->{warn});
6107     ## NOTE: Not an error.
6108     }
6109     }
6110    
6111     if (defined $max_value and defined $value_value) {
6112     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6113     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6114     $self->{onerror}->(node => $value,
6115     type => 'value gt max', ## TODOC: type
6116     level => $self->{level}->{warn});
6117     ## NOTE: Not an error.
6118     }
6119     }
6120     } elsif (defined $element_state->{number_value}->{min} or
6121     defined $element_state->{number_value}->{max}) {
6122     my $min_value = $element_state->{number_value}->{min};
6123     my $max_value = $element_state->{number_value}->{max};
6124     my $value_value = $element_state->{number_value}->{value};
6125    
6126     if (defined $min_value and defined $max_value) {
6127     if ($min_value > $max_value) {
6128     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6129     $self->{onerror}->(node => $max,
6130     type => 'max lt min', ## TODOC: type
6131     level => $self->{level}->{must});
6132     }
6133     }
6134    
6135     if (defined $min_value and defined $value_value) {
6136     if ($min_value > $value_value) {
6137     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6138     $self->{onerror}->(node => $value,
6139     type => 'value lt min', ## TODOC: type
6140     level => $self->{level}->{warn});
6141     ## NOTE: Not an error.
6142     }
6143     }
6144    
6145     if (defined $max_value and defined $value_value) {
6146     if ($max_value < $value_value) {
6147     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6148     $self->{onerror}->(node => $value,
6149     type => 'value gt max', ## TODOC: type
6150     level => $self->{level}->{warn});
6151     ## NOTE: Not an error.
6152     }
6153     }
6154     }
6155 wakaba 1.150
6156 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6157    
6158 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6159     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6160     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6161     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6162     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6163 wakaba 1.140 },
6164 wakaba 1.66 check_start => sub {
6165     my ($self, $item, $element_state) = @_;
6166 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6167     $self->{onerror}->(node => $item->{node},
6168     type => 'multiple labelable fae',
6169     level => $self->{level}->{must});
6170     } else {
6171     $self->{flag}->{has_labelable} = 2;
6172     }
6173 wakaba 1.138
6174     $element_state->{id_type} = 'labelable';
6175 wakaba 1.66 },
6176 wakaba 1.52 };
6177    
6178 wakaba 1.178 ## XXXresource: Dimension attributes have requirements on width and
6179     ## height of referenced resource.
6180 wakaba 1.80
6181 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6182 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6183 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6184 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6185 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6186     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6187     ## |button| elements.
6188 wakaba 1.56 action => $HTMLURIAttrChecker,
6189 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6190 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6191 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6192     'application/x-www-form-urlencoded' => 1,
6193     'multipart/form-data' => 1,
6194     'text/plain' => 1,
6195     }),
6196 wakaba 1.136 form => $HTMLFormAttrChecker,
6197 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6198     get => 1, post => 1, put => 1, delete => 1,
6199     }),
6200 wakaba 1.165 name => $FormControlNameAttrChecker,
6201 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6202 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6203     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6204 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6205     target => $HTMLTargetAttrChecker,
6206 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6207     ## attribute to support the |add| button type (as part of repetition
6208     ## template feature). It conflicts with the |template| global attribute
6209     ## introduced as part of the data template feature.
6210     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6211     ## author requirement.
6212 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6213     button => 1, submit => 1, reset => 1,
6214     }),
6215 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6216 wakaba 1.52 }, {
6217     %HTMLAttrStatus,
6218     %HTMLM12NCommonAttrStatus,
6219 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
6220 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6221 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6222 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6223     dataformatas => FEATURE_HTML4_REC_RESERVED,
6224     datasrc => FEATURE_HTML4_REC_RESERVED,
6225 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6226 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6227 wakaba 1.187 form => FEATURE_HTML5_LC | FEATURE_WF2X,
6228     lang => FEATURE_HTML5_REC,
6229 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6230 wakaba 1.187 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6231 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6232 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6233     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6234 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6235     onforminput => FEATURE_WF2_INFORMATIVE,
6236 wakaba 1.56 replace => FEATURE_WF2,
6237 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6238 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6239 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6240 wakaba 1.187 type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6241     value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6242 wakaba 1.52 }),
6243 wakaba 1.66 check_start => sub {
6244     my ($self, $item, $element_state) = @_;
6245 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6246     $self->{onerror}->(node => $item->{node},
6247     type => 'multiple labelable fae',
6248     level => $self->{level}->{must});
6249     } else {
6250     $self->{flag}->{has_labelable} = 2;
6251     }
6252 wakaba 1.162
6253     ## ISSUE: "The value attribute must not be present unless the form
6254     ## [content] attribute is present.": Wrong?
6255 wakaba 1.139
6256 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6257     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6258 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6259     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6260 wakaba 1.138
6261     $element_state->{id_type} = 'labelable';
6262 wakaba 1.66 },
6263 wakaba 1.52 };
6264    
6265     $Element->{$HTML_NS}->{label} = {
6266 wakaba 1.139 %HTMLPhrasingContentChecker,
6267 wakaba 1.187 status => FEATURE_HTML5_REC,
6268 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6269 wakaba 1.138 for => sub {
6270     my ($self, $attr) = @_;
6271    
6272     ## NOTE: MUST be an ID of a labelable element.
6273    
6274     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6275     },
6276 wakaba 1.136 form => $HTMLFormAttrChecker,
6277 wakaba 1.52 }, {
6278     %HTMLAttrStatus,
6279 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6280 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2 | FEATURE_M12N10_REC,
6281 wakaba 1.187 for => FEATURE_HTML5_REC,
6282     form => FEATURE_HTML5_LC,
6283     lang => FEATURE_HTML5_REC,
6284 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6285     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6286     }),
6287 wakaba 1.139 check_start => sub {
6288     my ($self, $item, $element_state) = @_;
6289     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6290    
6291     $element_state->{has_label_original} = $self->{flag}->{has_label};
6292     $self->{flag}->{has_label} = 1;
6293     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6294 wakaba 1.155 $self->{flag}->{has_labelable}
6295     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6296 wakaba 1.139
6297     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6298     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6299     },
6300     check_end => sub {
6301     my ($self, $item, $element_state) = @_;
6302     $self->_remove_minus_elements ($element_state);
6303    
6304     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6305     $self->{flag}->{has_labelable}
6306     = $element_state->{has_labelable_original};
6307     }
6308     delete $self->{flag}->{has_label}
6309     unless $element_state->{has_label_original};
6310     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6311    
6312     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6313    
6314     $HTMLPhrasingContentChecker{check_end}->(@_);
6315     },
6316 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6317     };
6318    
6319     $Element->{$HTML_NS}->{select} = {
6320 wakaba 1.121 %HTMLChecker,
6321 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6322 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6323     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6324 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6325 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6326 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6327 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6328 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6329 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6330 wakaba 1.136 form => $HTMLFormAttrChecker,
6331 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6332 wakaba 1.165 name => $FormControlNameAttrChecker,
6333 wakaba 1.163 ## TODO: tests for on*
6334 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6335     onforminput => $HTMLEventHandlerAttrChecker,
6336     oninput => $HTMLEventHandlerAttrChecker,
6337 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6338 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6339 wakaba 1.52 }, {
6340     %HTMLAttrStatus,
6341     %HTMLM12NCommonAttrStatus,
6342 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2,
6343 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6344 wakaba 1.56 data => FEATURE_WF2,
6345 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6346     dataformatas => FEATURE_HTML4_REC_RESERVED,
6347     datasrc => FEATURE_HTML4_REC_RESERVED,
6348 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6349     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6350     lang => FEATURE_HTML5_REC,
6351     multiple => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6352     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6353 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6354     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6355 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6356     onforminput => FEATURE_WF2_INFORMATIVE,
6357 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6358 wakaba 1.126 oninput => FEATURE_WF2,
6359 wakaba 1.56 oninvalid => FEATURE_WF2,
6360 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6361     sdapref => FEATURE_HTML20_RFC,
6362 wakaba 1.187 size => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6363 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6364     }),
6365 wakaba 1.66 check_start => sub {
6366     my ($self, $item, $element_state) = @_;
6367 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6368     $self->{onerror}->(node => $item->{node},
6369     type => 'multiple labelable fae',
6370     level => $self->{level}->{must});
6371     } else {
6372     $self->{flag}->{has_labelable} = 2;
6373     }
6374 wakaba 1.66
6375     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6376     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6377 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6378     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6379 wakaba 1.138
6380     $element_state->{id_type} = 'labelable';
6381 wakaba 1.66 },
6382 wakaba 1.121 check_child_element => sub {
6383 wakaba 1.163 ## NOTE: (option | optgroup)*
6384    
6385 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6386     $child_is_transparent, $element_state) = @_;
6387 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6388     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6389 wakaba 1.121 $self->{onerror}->(node => $child_el,
6390     type => 'element not allowed:minus',
6391     level => $self->{level}->{must});
6392     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6393     #
6394     } elsif ($child_nsuri eq $HTML_NS and
6395     {
6396     option => 1, optgroup => 1,
6397     }->{$child_ln}) {
6398     #
6399     } else {
6400     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6401     level => $self->{level}->{must});
6402     }
6403     },
6404     check_child_text => sub {
6405     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6406     if ($has_significant) {
6407     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6408     level => $self->{level}->{must});
6409     }
6410     },
6411 wakaba 1.52 };
6412 wakaba 1.1
6413 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6414 wakaba 1.121 %HTMLPhrasingContentChecker,
6415 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X,
6416 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6417     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6418     }, {
6419 wakaba 1.52 %HTMLAttrStatus,
6420 wakaba 1.56 data => FEATURE_WF2,
6421 wakaba 1.52 }),
6422 wakaba 1.66 check_start => sub {
6423     my ($self, $item, $element_state) = @_;
6424    
6425 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6426    
6427 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6428 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6429     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6430 wakaba 1.158
6431     $element_state->{id_type} = 'datalist';
6432 wakaba 1.66 },
6433 wakaba 1.121 ## NOTE: phrasing | option*
6434     check_child_element => sub {
6435     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6436     $child_is_transparent, $element_state) = @_;
6437 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6438     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6439 wakaba 1.121 $self->{onerror}->(node => $child_el,
6440     type => 'element not allowed:minus',
6441     level => $self->{level}->{must});
6442     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6443     #
6444     } elsif ($element_state->{phase} eq 'phrasing') {
6445     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6446     #
6447     } else {
6448     $self->{onerror}->(node => $child_el,
6449     type => 'element not allowed:phrasing',
6450     level => $self->{level}->{must});
6451     }
6452     } elsif ($element_state->{phase} eq 'option') {
6453     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6454     #
6455     } else {
6456     $self->{onerror}->(node => $child_el,
6457     type => 'element not allowed',
6458     level => $self->{level}->{must});
6459     }
6460     } elsif ($element_state->{phase} eq 'any') {
6461     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6462     $element_state->{phase} = 'phrasing';
6463     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6464     $element_state->{phase} = 'option';
6465     } else {
6466     $self->{onerror}->(node => $child_el,
6467     type => 'element not allowed',
6468     level => $self->{level}->{must});
6469     }
6470     } else {
6471     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6472     }
6473     },
6474     check_child_text => sub {
6475     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6476     if ($has_significant) {
6477     if ($element_state->{phase} eq 'phrasing') {
6478     #
6479     } elsif ($element_state->{phase} eq 'any') {
6480     $element_state->{phase} = 'phrasing';
6481     } else {
6482     $self->{onerror}->(node => $child_node,
6483     type => 'character not allowed',
6484     level => $self->{level}->{must});
6485     }
6486     }
6487     },
6488     check_end => sub {
6489     my ($self, $item, $element_state) = @_;
6490     if ($element_state->{phase} eq 'phrasing') {
6491     if ($element_state->{has_significant}) {
6492     $item->{real_parent_state}->{has_significant} = 1;
6493     } elsif ($item->{transparent}) {
6494     #
6495     } else {
6496     $self->{onerror}->(node => $item->{node},
6497     type => 'no significant content',
6498     level => $self->{level}->{should});
6499     }
6500     } else {
6501     ## NOTE: Since the content model explicitly allows a |datalist| element
6502     ## being empty, we don't raise "no significant content" error for this
6503     ## element when there is no element. (We should raise an error for
6504     ## |<datalist><br></datalist>|, however.)
6505     ## NOTE: As a side-effect, when the |datalist| element only contains
6506     ## non-conforming content, then the |phase| flag has not changed from
6507     ## |any|, no "no significant content" error is raised neither.
6508     $HTMLChecker{check_end}->(@_);
6509     }
6510     },
6511 wakaba 1.52 };
6512 wakaba 1.49
6513 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6514 wakaba 1.121 %HTMLChecker,
6515 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6516 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6517     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6518 wakaba 1.164 label => sub {},
6519 wakaba 1.52 }, {
6520     %HTMLAttrStatus,
6521     %HTMLM12NCommonAttrStatus,
6522 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6523     label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6524     lang => FEATURE_HTML5_REC,
6525 wakaba 1.52 }),
6526 wakaba 1.164 check_attrs2 => sub {
6527     my ($self, $item, $element_state) = @_;
6528    
6529     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6530     $self->{onerror}->(node => $item->{node},
6531     type => 'attribute missing',
6532     text => 'label',
6533     level => $self->{level}->{must});
6534     }
6535     },
6536 wakaba 1.121 check_child_element => sub {
6537     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6538     $child_is_transparent, $element_state) = @_;
6539 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6540     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6541 wakaba 1.121 $self->{onerror}->(node => $child_el,
6542     type => 'element not allowed:minus',
6543     level => $self->{level}->{must});
6544     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6545     #
6546     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6547     #
6548     } else {
6549     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6550     level => $self->{level}->{must});
6551     }
6552     },
6553     check_child_text => sub {
6554     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6555     if ($has_significant) {
6556     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6557     level => $self->{level}->{must});
6558     }
6559     },
6560 wakaba 1.52 };
6561    
6562     $Element->{$HTML_NS}->{option} = {
6563     %HTMLTextChecker,
6564 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6565 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6566     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6567 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6568     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6569     value => sub {}, ## NOTE: No restriction.
6570 wakaba 1.52 }, {
6571     %HTMLAttrStatus,
6572     %HTMLM12NCommonAttrStatus,
6573 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6574     label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6575     lang => FEATURE_HTML5_REC,
6576 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6577     sdapref => FEATURE_HTML20_RFC,
6578 wakaba 1.187 selected => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6579     value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6580 wakaba 1.52 }),
6581     };
6582 wakaba 1.49
6583 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6584     %HTMLTextChecker,
6585 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6586 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6587 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6588 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6589 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6590 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6591 wakaba 1.136 form => $HTMLFormAttrChecker,
6592 wakaba 1.56 ## TODO: inputmode [WF2]
6593 wakaba 1.164 maxlength => sub {
6594     my ($self, $attr, $item, $element_state) = @_;
6595    
6596     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6597    
6598 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6599 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6600     ## results in a number.
6601     my $max_allowed_value_length = 0+$1;
6602    
6603     ## ISSUE: "The the purposes of this requirement," (typo)
6604    
6605     ## ISSUE: This constraint is applied w/o CRLF normalization to
6606     ## |value| attribute, but w/ CRLF normalization to
6607     ## concept-value.
6608     my $value = $item->{node}->text_content;
6609     if (defined $value) {
6610     my $codepoint_length = length $value;
6611    
6612     if ($codepoint_length > $max_allowed_value_length) {
6613     $self->{onerror}->(node => $item->{node},
6614     type => 'value too long',
6615     level => $self->{level}->{must});
6616     }
6617     }
6618     }
6619     },
6620 wakaba 1.165 name => $FormControlNameAttrChecker,
6621 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6622     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6623     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6624 wakaba 1.161 pattern => $PatternAttrChecker,
6625 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
6626 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6627 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6628 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6629     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6630     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6631 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6632 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6633 wakaba 1.52 }, {
6634     %HTMLAttrStatus,
6635     %HTMLM12NCommonAttrStatus,
6636 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6637 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6638 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
6639 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6640     cols => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6641 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6642 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6643     datasrc => FEATURE_HTML4_REC_RESERVED,
6644 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6645     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6646 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6647 wakaba 1.187 lang => FEATURE_HTML5_REC,
6648 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6649 wakaba 1.187 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6650 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6651     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6652     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6653 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6654     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6655     oninput => FEATURE_WF2, ## TODO: tests
6656     oninvalid => FEATURE_WF2, ## TODO: tests
6657 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6658 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6659 wakaba 1.179 placeholder => FEATURE_HTML5_LC,
6660 wakaba 1.187 readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6661     required => FEATURE_HTML5_LC | FEATURE_WF2X,
6662     rows => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6663 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6664     sdapref => FEATURE_HTML20_RFC,
6665 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6666 wakaba 1.187 wrap => FEATURE_HTML5_LC | FEATURE_WF2X,
6667 wakaba 1.52 }),
6668 wakaba 1.66 check_start => sub {
6669     my ($self, $item, $element_state) = @_;
6670 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6671     $self->{onerror}->(node => $item->{node},
6672     type => 'multiple labelable fae',
6673     level => $self->{level}->{must});
6674     } else {
6675     $self->{flag}->{has_labelable} = 2;
6676     }
6677 wakaba 1.164
6678     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6679     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6680     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6681    
6682     $element_state->{id_type} = 'labelable';
6683     },
6684     check_attrs2 => sub {
6685     my ($self, $item, $element_state) = @_;
6686 wakaba 1.66
6687 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6688     not $item->{node}->has_attribute_ns (undef, 'title')) {
6689     ## NOTE: WF2 (dropped by HTML5)
6690     $self->{onerror}->(node => $item->{node},
6691     type => 'attribute missing',
6692     text => 'title',
6693     level => $self->{level}->{should});
6694     }
6695    
6696 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6697     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6698     if (defined $wrap) {
6699     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6700     if ($wrap eq 'hard') {
6701     $self->{onerror}->(node => $item->{node},
6702     type => 'attribute missing',
6703     text => 'cols',
6704     level => $self->{level}->{must});
6705     }
6706     }
6707     }
6708 wakaba 1.66 },
6709 wakaba 1.52 };
6710 wakaba 1.49
6711 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6712 wakaba 1.121 %HTMLPhrasingContentChecker,
6713 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X,
6714 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6715 wakaba 1.165 for => sub {
6716     my ($self, $attr) = @_;
6717    
6718     ## NOTE: "Unordered set of unique space-separated tokens".
6719    
6720     my %word;
6721     for my $word (grep {length $_}
6722     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6723     unless ($word{$word}) {
6724     $word{$word} = 1;
6725     push @{$self->{idref}}, ['any', $word, $attr];
6726     } else {
6727     $self->{onerror}->(node => $attr, type => 'duplicate token',
6728     value => $word,
6729     level => $self->{level}->{must});
6730     }
6731     }
6732     },
6733 wakaba 1.136 form => $HTMLFormAttrChecker,
6734 wakaba 1.165 name => $FormControlNameAttrChecker,
6735     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6736     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6737 wakaba 1.56 }, {
6738 wakaba 1.52 %HTMLAttrStatus,
6739 wakaba 1.187 for => FEATURE_HTML5_LC | FEATURE_WF2X,
6740     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6741     name => FEATURE_HTML5_LC | FEATURE_WF2X,
6742 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6743     onformchange => FEATURE_WF2,
6744     onforminput => FEATURE_WF2,
6745 wakaba 1.52 }),
6746     };
6747    
6748     $Element->{$HTML_NS}->{isindex} = {
6749     %HTMLEmptyChecker,
6750 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6751     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6752 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6753     prompt => sub {}, ## NOTE: Text [M12N]
6754     }, {
6755     %HTMLAttrStatus,
6756 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6757     dir => FEATURE_HTML5_REC,
6758     id => FEATURE_HTML5_REC,
6759     lang => FEATURE_HTML5_REC,
6760 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6761 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6762 wakaba 1.187 style => FEATURE_HTML5_REC,
6763     title => FEATURE_HTML5_REC,
6764 wakaba 1.52 }),
6765     ## TODO: Tests
6766     ## TODO: Tests for <nest/> in <isindex>
6767 wakaba 1.66 check_start => sub {
6768     my ($self, $item, $element_state) = @_;
6769    
6770     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6771 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6772     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6773 wakaba 1.66 },
6774 wakaba 1.52 };
6775 wakaba 1.49
6776 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6777 wakaba 1.40 %HTMLChecker,
6778 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6779 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6780 wakaba 1.91 charset => sub {
6781     my ($self, $attr) = @_;
6782    
6783     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6784     $self->{onerror}->(type => 'attribute not allowed',
6785     node => $attr,
6786 wakaba 1.104 level => $self->{level}->{must});
6787 wakaba 1.91 }
6788    
6789     $HTMLCharsetChecker->($attr->value, @_);
6790     },
6791 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6792 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6793 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6794     async => $GetHTMLBooleanAttrChecker->('async'),
6795 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6796 wakaba 1.49 }, {
6797     %HTMLAttrStatus,
6798 wakaba 1.153 async => FEATURE_HTML5_WD,
6799     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6800     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6801 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6802     for => FEATURE_HTML4_REC_RESERVED,
6803 wakaba 1.154 href => FEATURE_RDFA_REC,
6804 wakaba 1.187 id => FEATURE_HTML5_REC,
6805 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6806 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6807     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6808 wakaba 1.9 }),
6809 wakaba 1.40 check_start => sub {
6810     my ($self, $item, $element_state) = @_;
6811 wakaba 1.1
6812 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6813     $element_state->{must_be_empty} = 1;
6814 wakaba 1.1 } else {
6815     ## NOTE: No content model conformance in HTML5 spec.
6816 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6817     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6818 wakaba 1.1 if ((defined $type and $type eq '') or
6819     (defined $language and $language eq '')) {
6820     $type = 'text/javascript';
6821     } elsif (defined $type) {
6822     #
6823     } elsif (defined $language) {
6824     $type = 'text/' . $language;
6825     } else {
6826     $type = 'text/javascript';
6827     }
6828 wakaba 1.93
6829     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6830     $type = "$1/$2";
6831     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6832     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6833     }
6834     $element_state->{script_type} = $type;
6835 wakaba 1.40 }
6836 wakaba 1.66
6837     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6838 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6839     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6840 wakaba 1.107
6841     $element_state->{text} = '';
6842 wakaba 1.40 },
6843     check_child_element => sub {
6844     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6845     $child_is_transparent, $element_state) = @_;
6846 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6847     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6848 wakaba 1.40 $self->{onerror}->(node => $child_el,
6849     type => 'element not allowed:minus',
6850 wakaba 1.104 level => $self->{level}->{must});
6851 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6852     #
6853     } else {
6854     if ($element_state->{must_be_empty}) {
6855     $self->{onerror}->(node => $child_el,
6856 wakaba 1.104 type => 'element not allowed:empty',
6857     level => $self->{level}->{must});
6858 wakaba 1.40 }
6859     }
6860     },
6861     check_child_text => sub {
6862     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6863     if ($has_significant and
6864     $element_state->{must_be_empty}) {
6865     $self->{onerror}->(node => $child_node,
6866 wakaba 1.104 type => 'character not allowed:empty',
6867     level => $self->{level}->{must});
6868 wakaba 1.40 }
6869 wakaba 1.115 $element_state->{text} .= $child_node->data;
6870 wakaba 1.40 },
6871     check_end => sub {
6872     my ($self, $item, $element_state) = @_;
6873     unless ($element_state->{must_be_empty}) {
6874 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6875     ## NOTE: XML content should be checked by THIS instance of checker
6876     ## as part of normal tree validation.
6877 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6878     type => 'XML script lang',
6879     text => $element_state->{script_type},
6880     level => $self->{level}->{uncertain});
6881     ## ISSUE: Should we raise some kind of error for
6882     ## <script type="text/xml">aaaaa</script>?
6883     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6884 wakaba 1.93 } else {
6885     $self->{onsubdoc}->({s => $element_state->{text},
6886     container_node => $item->{node},
6887     media_type => $element_state->{script_type},
6888     is_char_string => 1});
6889     }
6890 wakaba 1.40
6891     $HTMLChecker{check_end}->(@_);
6892 wakaba 1.1 }
6893     },
6894 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6895     ## NOTE: "When used to include script data, the script data must be embedded
6896     ## inline, the format of the data must be given using the type attribute,
6897     ## and the src attribute must not be specified." - not testable.
6898     ## TODO: It would be possible to err <script type=text/plain src=...>
6899 wakaba 1.1 };
6900 wakaba 1.25 ## ISSUE: Significant check and text child node
6901 wakaba 1.1
6902     ## NOTE: When script is disabled.
6903     $Element->{$HTML_NS}->{noscript} = {
6904 wakaba 1.40 %HTMLTransparentChecker,
6905 wakaba 1.187 status => FEATURE_HTML5_REC,
6906 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6907     %HTMLAttrStatus,
6908     %HTMLM12NCommonAttrStatus,
6909 wakaba 1.187 lang => FEATURE_HTML5_REC,
6910 wakaba 1.49 }),
6911 wakaba 1.40 check_start => sub {
6912     my ($self, $item, $element_state) = @_;
6913 wakaba 1.3
6914 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6915 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6916     level => $self->{level}->{must});
6917 wakaba 1.3 }
6918    
6919 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6920     $self->_add_minus_elements ($element_state,
6921     {$HTML_NS => {noscript => 1}});
6922     }
6923 wakaba 1.79
6924     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6925     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6926 wakaba 1.3 },
6927 wakaba 1.40 check_child_element => sub {
6928     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6929     $child_is_transparent, $element_state) = @_;
6930     if ($self->{flag}->{in_head}) {
6931 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6932     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6933 wakaba 1.40 $self->{onerror}->(node => $child_el,
6934     type => 'element not allowed:minus',
6935 wakaba 1.104 level => $self->{level}->{must});
6936 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6937     #
6938     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6939     #
6940     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6941     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6942     $self->{onerror}->(node => $child_el,
6943     type => 'element not allowed:head noscript',
6944 wakaba 1.104 level => $self->{level}->{must});
6945 wakaba 1.40 }
6946     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6947 wakaba 1.47 my $http_equiv_attr
6948     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6949     if ($http_equiv_attr) {
6950     ## TODO: case
6951     if (lc $http_equiv_attr->value eq 'content-type') {
6952 wakaba 1.40 $self->{onerror}->(node => $child_el,
6953 wakaba 1.34 type => 'element not allowed:head noscript',
6954 wakaba 1.104 level => $self->{level}->{must});
6955 wakaba 1.47 } else {
6956     #
6957 wakaba 1.3 }
6958 wakaba 1.47 } else {
6959     $self->{onerror}->(node => $child_el,
6960     type => 'element not allowed:head noscript',
6961 wakaba 1.104 level => $self->{level}->{must});
6962 wakaba 1.3 }
6963 wakaba 1.40 } else {
6964     $self->{onerror}->(node => $child_el,
6965     type => 'element not allowed:head noscript',
6966 wakaba 1.104 level => $self->{level}->{must});
6967 wakaba 1.40 }
6968     } else {
6969     $HTMLTransparentChecker{check_child_element}->(@_);
6970     }
6971     },
6972     check_child_text => sub {
6973     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6974     if ($self->{flag}->{in_head}) {
6975     if ($has_significant) {
6976     $self->{onerror}->(node => $child_node,
6977 wakaba 1.104 type => 'character not allowed',
6978     level => $self->{level}->{must});
6979 wakaba 1.3 }
6980     } else {
6981 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6982     }
6983     },
6984     check_end => sub {
6985     my ($self, $item, $element_state) = @_;
6986     $self->_remove_minus_elements ($element_state);
6987     if ($self->{flag}->{in_head}) {
6988     $HTMLChecker{check_end}->(@_);
6989     } else {
6990     $HTMLPhrasingContentChecker{check_end}->(@_);
6991 wakaba 1.3 }
6992 wakaba 1.1 },
6993     };
6994 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6995 wakaba 1.1
6996     $Element->{$HTML_NS}->{'event-source'} = {
6997 wakaba 1.40 %HTMLEmptyChecker,
6998 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6999     check_attrs => $GetHTMLAttrsChecker->({
7000     src => $HTMLURIAttrChecker,
7001     }, {
7002     %HTMLAttrStatus,
7003     src => FEATURE_HTML5_LC_DROPPED,
7004     }),
7005     check_start => sub {
7006     my ($self, $item, $element_state) = @_;
7007    
7008     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7009     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7010     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7011     },
7012     };
7013    
7014     $Element->{$HTML_NS}->{eventsource} = {
7015     %HTMLEmptyChecker,
7016 wakaba 1.180 status => FEATURE_HTML5_DROPPED,
7017 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7018 wakaba 1.1 src => $HTMLURIAttrChecker,
7019 wakaba 1.50 }, {
7020     %HTMLAttrStatus,
7021 wakaba 1.180 src => FEATURE_HTML5_DROPPED,
7022 wakaba 1.1 }),
7023 wakaba 1.66 check_start => sub {
7024     my ($self, $item, $element_state) = @_;
7025    
7026     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7027 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7028     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7029 wakaba 1.66 },
7030 wakaba 1.1 };
7031    
7032     $Element->{$HTML_NS}->{details} = {
7033 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
7034 wakaba 1.153 status => FEATURE_HTML5_LC,
7035 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7036 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
7037 wakaba 1.50 }, {
7038     %HTMLAttrStatus,
7039 wakaba 1.153 open => FEATURE_HTML5_LC,
7040 wakaba 1.1 }),
7041     };
7042    
7043     $Element->{$HTML_NS}->{datagrid} = {
7044 wakaba 1.72 %HTMLFlowContentChecker,
7045 wakaba 1.48 status => FEATURE_HTML5_WD,
7046 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7047 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7048     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7049 wakaba 1.50 }, {
7050     %HTMLAttrStatus,
7051     disabled => FEATURE_HTML5_WD,
7052     multiple => FEATURE_HTML5_WD,
7053 wakaba 1.1 }),
7054 wakaba 1.40 check_start => sub {
7055     my ($self, $item, $element_state) = @_;
7056 wakaba 1.1
7057 wakaba 1.40 $self->_add_minus_elements ($element_state,
7058     {$HTML_NS => {a => 1, datagrid => 1}});
7059 wakaba 1.172
7060 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7061     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7062 wakaba 1.40 },
7063     check_end => sub {
7064     my ($self, $item, $element_state) = @_;
7065     $self->_remove_minus_elements ($element_state);
7066 wakaba 1.1
7067 wakaba 1.172 $HTMLFlowContentChecker{check_end}->(@_);
7068 wakaba 1.40 },
7069 wakaba 1.1 };
7070    
7071     $Element->{$HTML_NS}->{command} = {
7072 wakaba 1.40 %HTMLEmptyChecker,
7073 wakaba 1.48 status => FEATURE_HTML5_WD,
7074 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7075 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7076     default => $GetHTMLBooleanAttrChecker->('default'),
7077     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7078     icon => $HTMLURIAttrChecker,
7079     label => sub { }, ## NOTE: No conformance creteria
7080     radiogroup => sub { }, ## NOTE: No conformance creteria
7081     type => sub {
7082     my ($self, $attr) = @_;
7083     my $value = $attr->value;
7084     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7085 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7086     level => $self->{level}->{must});
7087 wakaba 1.1 }
7088     },
7089 wakaba 1.50 }, {
7090     %HTMLAttrStatus,
7091     checked => FEATURE_HTML5_WD,
7092 wakaba 1.175 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7093 wakaba 1.50 disabled => FEATURE_HTML5_WD,
7094     icon => FEATURE_HTML5_WD,
7095     label => FEATURE_HTML5_WD,
7096     radiogroup => FEATURE_HTML5_WD,
7097     type => FEATURE_HTML5_WD,
7098 wakaba 1.1 }),
7099 wakaba 1.66 check_start => sub {
7100     my ($self, $item, $element_state) = @_;
7101    
7102     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7103 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7104     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7105 wakaba 1.66 },
7106 wakaba 1.115 };
7107    
7108     $Element->{$HTML_NS}->{bb} = {
7109     %HTMLPhrasingContentChecker,
7110 wakaba 1.153 status => FEATURE_HTML5_WD,
7111 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7112     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7113     }, {
7114     %HTMLAttrStatus,
7115 wakaba 1.153 type => FEATURE_HTML5_WD,
7116 wakaba 1.115 }),
7117 wakaba 1.130 check_start => sub {
7118     my ($self, $item, $element_state) = @_;
7119     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7120    
7121     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7122     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7123     },
7124     check_end => sub {
7125     my ($self, $item, $element_state) = @_;
7126     $self->_remove_minus_elements ($element_state);
7127    
7128     $HTMLTransparentChecker{check_end}->(@_);
7129     },
7130 wakaba 1.1 };
7131    
7132     $Element->{$HTML_NS}->{menu} = {
7133 wakaba 1.40 %HTMLPhrasingContentChecker,
7134 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7135     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7136     ## NOTE: We don't want any |menu| element warned as deprecated.
7137 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7138 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7139 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7140 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7141     ## implementation, it does not match.)
7142 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7143     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7144 wakaba 1.49 }, {
7145     %HTMLAttrStatus,
7146     %HTMLM12NCommonAttrStatus,
7147 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7148 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7149 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7150 wakaba 1.50 label => FEATURE_HTML5_WD,
7151 wakaba 1.187 lang => FEATURE_HTML5_REC,
7152 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7153     sdapref => FEATURE_HTML20_RFC,
7154 wakaba 1.50 type => FEATURE_HTML5_WD,
7155 wakaba 1.1 }),
7156 wakaba 1.40 check_start => sub {
7157     my ($self, $item, $element_state) = @_;
7158     $element_state->{phase} = 'li or phrasing';
7159     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7160     $self->{flag}->{in_menu} = 1;
7161 wakaba 1.79
7162     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7163     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7164 wakaba 1.135 $element_state->{id_type} = 'menu';
7165 wakaba 1.40 },
7166     check_child_element => sub {
7167     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7168     $child_is_transparent, $element_state) = @_;
7169 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7170     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7171 wakaba 1.40 $self->{onerror}->(node => $child_el,
7172     type => 'element not allowed:minus',
7173 wakaba 1.104 level => $self->{level}->{must});
7174 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7175     #
7176     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7177     if ($element_state->{phase} eq 'li') {
7178     #
7179     } elsif ($element_state->{phase} eq 'li or phrasing') {
7180     $element_state->{phase} = 'li';
7181     } else {
7182 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7183     level => $self->{level}->{must});
7184 wakaba 1.40 }
7185     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7186     if ($element_state->{phase} eq 'phrasing') {
7187     #
7188     } elsif ($element_state->{phase} eq 'li or phrasing') {
7189     $element_state->{phase} = 'phrasing';
7190     } else {
7191 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7192     level => $self->{level}->{must});
7193 wakaba 1.40 }
7194     } else {
7195 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7196     level => $self->{level}->{must});
7197 wakaba 1.40 }
7198     },
7199     check_child_text => sub {
7200     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7201     if ($has_significant) {
7202     if ($element_state->{phase} eq 'phrasing') {
7203     #
7204     } elsif ($element_state->{phase} eq 'li or phrasing') {
7205     $element_state->{phase} = 'phrasing';
7206     } else {
7207     $self->{onerror}->(node => $child_node,
7208 wakaba 1.104 type => 'character not allowed',
7209     level => $self->{level}->{must});
7210 wakaba 1.1 }
7211     }
7212 wakaba 1.40 },
7213     check_end => sub {
7214     my ($self, $item, $element_state) = @_;
7215     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7216    
7217     if ($element_state->{phase} eq 'li') {
7218     $HTMLChecker{check_end}->(@_);
7219     } else { # 'phrasing' or 'li or phrasing'
7220     $HTMLPhrasingContentChecker{check_end}->(@_);
7221 wakaba 1.1 }
7222     },
7223 wakaba 1.8 };
7224    
7225     $Element->{$HTML_NS}->{datatemplate} = {
7226 wakaba 1.40 %HTMLChecker,
7227 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7228 wakaba 1.40 check_child_element => sub {
7229     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7230     $child_is_transparent, $element_state) = @_;
7231 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7232     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7233 wakaba 1.40 $self->{onerror}->(node => $child_el,
7234     type => 'element not allowed:minus',
7235 wakaba 1.104 level => $self->{level}->{must});
7236 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7237     #
7238     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7239     #
7240     } else {
7241     $self->{onerror}->(node => $child_el,
7242 wakaba 1.104 type => 'element not allowed:datatemplate',
7243     level => $self->{level}->{must});
7244 wakaba 1.40 }
7245     },
7246     check_child_text => sub {
7247     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7248     if ($has_significant) {
7249 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7250     level => $self->{level}->{must});
7251 wakaba 1.8 }
7252     },
7253     is_xml_root => 1,
7254     };
7255    
7256     $Element->{$HTML_NS}->{rule} = {
7257 wakaba 1.40 %HTMLChecker,
7258 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7259 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7260 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7261 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7262 wakaba 1.50 }, {
7263     %HTMLAttrStatus,
7264     condition => FEATURE_HTML5_AT_RISK,
7265     mode => FEATURE_HTML5_AT_RISK,
7266 wakaba 1.8 }),
7267 wakaba 1.40 check_start => sub {
7268     my ($self, $item, $element_state) = @_;
7269 wakaba 1.79
7270 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7271 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7272     $self->{flag}->{in_rule} = 1;
7273    
7274     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7275     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7276 wakaba 1.40 },
7277     check_child_element => sub { },
7278     check_child_text => sub { },
7279     check_end => sub {
7280     my ($self, $item, $element_state) = @_;
7281 wakaba 1.79
7282 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7283 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7284    
7285 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7286 wakaba 1.8 },
7287     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7288     ## is applied to some conforming data, results in a conforming DOM tree.":
7289     ## We don't check against this.
7290     };
7291    
7292     $Element->{$HTML_NS}->{nest} = {
7293 wakaba 1.40 %HTMLEmptyChecker,
7294 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7295 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7296 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7297     mode => sub {
7298     my ($self, $attr) = @_;
7299     my $value = $attr->value;
7300 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7301 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7302     level => $self->{level}->{must});
7303 wakaba 1.23 }
7304     },
7305 wakaba 1.50 }, {
7306     %HTMLAttrStatus,
7307     filter => FEATURE_HTML5_AT_RISK,
7308     mode => FEATURE_HTML5_AT_RISK,
7309 wakaba 1.8 }),
7310 wakaba 1.1 };
7311    
7312     $Element->{$HTML_NS}->{legend} = {
7313 wakaba 1.40 %HTMLPhrasingContentChecker,
7314 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7315 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7316     # align => $GetHTMLEnumeratedAttrChecker->({
7317     # top => 1, bottom => 1, left => 1, right => 1,
7318     # }),
7319 wakaba 1.167 form => $HTMLFormAttrChecker,
7320 wakaba 1.52 }, {
7321 wakaba 1.49 %HTMLAttrStatus,
7322     %HTMLM12NCommonAttrStatus,
7323 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7324 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7325 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7326 wakaba 1.187 lang => FEATURE_HTML5_REC,
7327 wakaba 1.49 }),
7328 wakaba 1.170 check_child_element => sub {
7329     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7330     $child_is_transparent, $element_state) = @_;
7331     if ($item->{parent_state}->{in_figure}) {
7332     $HTMLFlowContentChecker{check_child_element}->(@_);
7333     } else {
7334     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7335     }
7336     },
7337     check_child_text => sub {
7338     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7339     if ($item->{parent_state}->{in_figure}) {
7340     $HTMLFlowContentChecker{check_child_text}->(@_);
7341     } else {
7342     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7343     }
7344     },
7345     check_start => sub {
7346     my ($self, $item, $element_state) = @_;
7347     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7348    
7349     $HTMLFlowContentChecker{check_start}->(@_);
7350     },
7351     check_end => sub {
7352     my ($self, $item, $element_state) = @_;
7353     $self->_remove_minus_elements ($element_state);
7354    
7355     $HTMLFlowContentChecker{check_end}->(@_);
7356     },
7357     }; # legend
7358 wakaba 1.1
7359     $Element->{$HTML_NS}->{div} = {
7360 wakaba 1.72 %HTMLFlowContentChecker,
7361 wakaba 1.187 status => FEATURE_HTML5_REC,
7362 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7363     align => $GetHTMLEnumeratedAttrChecker->({
7364     left => 1, center => 1, right => 1, justify => 1,
7365     }),
7366     }, {
7367 wakaba 1.49 %HTMLAttrStatus,
7368 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7369 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7370     datafld => FEATURE_HTML4_REC_RESERVED,
7371     dataformatas => FEATURE_HTML4_REC_RESERVED,
7372     datasrc => FEATURE_HTML4_REC_RESERVED,
7373 wakaba 1.187 lang => FEATURE_HTML5_REC,
7374 wakaba 1.49 }),
7375 wakaba 1.66 check_start => sub {
7376     my ($self, $item, $element_state) = @_;
7377    
7378     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7379 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7380     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7381 wakaba 1.66 },
7382 wakaba 1.1 };
7383    
7384 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7385 wakaba 1.72 %HTMLFlowContentChecker,
7386 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7387     check_attrs => $GetHTMLAttrsChecker->({}, {
7388     %HTMLAttrStatus,
7389     %HTMLM12NCommonAttrStatus,
7390 wakaba 1.187 lang => FEATURE_HTML5_REC,
7391 wakaba 1.64 }),
7392     };
7393    
7394 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7395 wakaba 1.40 %HTMLTransparentChecker,
7396 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7397 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7398     ## TODO: HTML4 |size|, |color|, |face|
7399 wakaba 1.49 }, {
7400     %HTMLAttrStatus,
7401 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7402 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7403 wakaba 1.187 dir => FEATURE_HTML5_REC,
7404 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7405 wakaba 1.187 id => FEATURE_HTML5_REC,
7406     lang => FEATURE_HTML5_REC,
7407 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7408 wakaba 1.187 style => FEATURE_HTML5_REC,
7409     title => FEATURE_HTML5_REC,
7410 wakaba 1.49 }),
7411 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7412     ## it is allowed only in a document with the WYSIWYG signature. The
7413     ## checker does not check whether there is the signature, since the
7414     ## signature is dropped, too, and has never been implemented. (In addition,
7415     ## for any |font| element an "element not defined" error is raised anyway,
7416     ## such that we don't have to raise an additional error.)
7417 wakaba 1.1 };
7418 wakaba 1.49
7419 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7420     %HTMLEmptyChecker,
7421     status => FEATURE_M12N10_REC_DEPRECATED,
7422     check_attrs => $GetHTMLAttrsChecker->({
7423     ## TODO: color, face, size
7424     }, {
7425     %HTMLAttrStatus,
7426     color => FEATURE_M12N10_REC_DEPRECATED,
7427     face => FEATURE_M12N10_REC_DEPRECATED,
7428 wakaba 1.187 id => FEATURE_HTML5_REC,
7429 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7430     }),
7431     };
7432    
7433 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7434     ## class title id cols rows onload onunload style(x10)
7435     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7436     ## noframes Common, lang(xhtml10)
7437    
7438 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7439 wakaba 1.56
7440 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7441     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7442     ## xmp, listing sdapref[HTML2,0]
7443    
7444 wakaba 1.56 =pod
7445    
7446 wakaba 1.61 HTML 2.0 nextid @n
7447    
7448     RFC 2659: CERTS CRYPTOPTS
7449    
7450     ISO-HTML: pre-html, divN
7451 wakaba 1.82
7452     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7453     di (Common), nl (Common), handler (Common, type), standby (Common),
7454     summary (Common)
7455    
7456 wakaba 1.97 Access & XHTML2: access (LC)
7457 wakaba 1.82
7458     XML Events & XForms (for XHTML2 support; very, very low priority)
7459 wakaba 1.61
7460 wakaba 1.56 =cut
7461 wakaba 1.61
7462     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7463     ## We added them only to |a|. |link| and |form| might also allow them
7464     ## in theory.
7465 wakaba 1.1
7466     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7467    
7468     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24