/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.169 - (hide annotations) (download)
Sun Jun 28 07:02:30 2009 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.168: +14 -2 lines
allow flow content in <caption> (html5 rev.3329)

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.89 sub FEATURE_HTML5_COMPLETE () {
10 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
11 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
12     Whatpm::ContentChecker::FEATURE_ALLOWED
13     }
14 wakaba 1.154 sub FEATURE_HTML5_CR () {
15     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
16     Whatpm::ContentChecker::FEATURE_STATUS_CR |
17     Whatpm::ContentChecker::FEATURE_ALLOWED
18     }
19 wakaba 1.54 sub FEATURE_HTML5_LC () {
20 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
21 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_AT_RISK () {
25 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
26     ## status.
27 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
28     Whatpm::ContentChecker::FEATURE_ALLOWED
29     }
30     sub FEATURE_HTML5_WD () {
31 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
32 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
33     Whatpm::ContentChecker::FEATURE_ALLOWED
34     }
35     sub FEATURE_HTML5_FD () {
36 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
37 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40     sub FEATURE_HTML5_DEFAULT () {
41 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
42 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
43     Whatpm::ContentChecker::FEATURE_ALLOWED
44 wakaba 1.49 }
45 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
46 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
47     ## comments, but then dropped.
48 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
49     }
50 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
51 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
52     ## then dropped.
53 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
54     }
55 wakaba 1.154
56 wakaba 1.119 sub FEATURE_WF2X () {
57 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
58     ## incorporated into the HTML5 spec.
59 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
60     }
61 wakaba 1.54 sub FEATURE_WF2 () {
62 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
63     ## merged into HTML5.
64 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
65 wakaba 1.54 }
66 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
67 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
68     ## were not merged into HTML5.
69 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
70     }
71 wakaba 1.49
72 wakaba 1.154 sub FEATURE_RDFA_REC () {
73     Whatpm::ContentChecker::FEATURE_STATUS_REC
74 wakaba 1.121 }
75 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
76     ## NOTE: The feature that was defined in a RDFa last call working
77     ## draft, but then dropped.
78 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
79     }
80 wakaba 1.58
81     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
82     ## attribute can be used- the only requirements for that matter is:
83     ## "the attribute MUST be referenced using its namespace-qualified form" (and
84     ## this is a host language conformance!).
85 wakaba 1.82 sub FEATURE_ROLE_LC () {
86     Whatpm::ContentChecker::FEATURE_STATUS_LC
87     }
88    
89     sub FEATURE_XHTML2_ED () {
90 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
91     ## "http://www.w3.org/1999/xhtml".
92 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
93     }
94 wakaba 1.58
95 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
96 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
97     ## M12N).
98     Whatpm::ContentChecker::FEATURE_STATUS_REC
99 wakaba 1.55 }
100     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
101 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
102     ## features.
103     Whatpm::ContentChecker::FEATURE_STATUS_REC |
104 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
105     }
106    
107 wakaba 1.154 sub FEATURE_RUBY_REC () {
108     Whatpm::ContentChecker::FEATURE_STATUS_CR
109 wakaba 1.82 }
110    
111 wakaba 1.154 sub FEATURE_M12N11_LC () {
112     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
113     Whatpm::ContentChecker::FEATURE_STATUS_REC;
114 wakaba 1.99 }
115    
116 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
117     ## It contains a number of problems. (However, again, it's a REC!)
118 wakaba 1.54 sub FEATURE_M12N10_REC () {
119 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
120 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
121     }
122     sub FEATURE_M12N10_REC_DEPRECATED () {
123     Whatpm::ContentChecker::FEATURE_STATUS_REC |
124     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
125     }
126 wakaba 1.49
127     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
128     ## (second edition). Only missing attributes from M12N10 abstract
129     ## definition are added.
130 wakaba 1.54 sub FEATURE_XHTML10_REC () {
131     Whatpm::ContentChecker::FEATURE_STATUS_CR
132     }
133    
134 wakaba 1.61 ## NOTE: Diff from HTML4.
135     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
136     Whatpm::ContentChecker::FEATURE_STATUS_CR
137     }
138 wakaba 1.58
139 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
140     ## 4.01). Only missing attributes from XHTML10 are added.
141 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
142     Whatpm::ContentChecker::FEATURE_STATUS_WD
143     }
144    
145     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
146     ## rather than presentational attributes (deprecated or not deprecated).
147 wakaba 1.48
148 wakaba 1.61 ## NOTE: Diff from HTML4.
149     sub FEATURE_HTML32_REC_OBSOLETE () {
150     Whatpm::ContentChecker::FEATURE_STATUS_CR |
151     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
152     ## NOTE: Lowercase normative "should".
153     }
154    
155     sub FEATURE_RFC2659 () { ## Experimental RFC
156     Whatpm::ContentChecker::FEATURE_STATUS_CR
157     }
158    
159     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
160     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
161     Whatpm::ContentChecker::FEATURE_STATUS_CR
162     }
163    
164     ## NOTE: Diff from HTML 2.0.
165     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
166     Whatpm::ContentChecker::FEATURE_STATUS_CR
167     }
168    
169     ## NOTE: Diff from HTML 3.2.
170     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
171     Whatpm::ContentChecker::FEATURE_STATUS_CR
172     }
173 wakaba 1.58
174 wakaba 1.29 ## December 2007 HTML5 Classification
175    
176     my $HTMLMetadataContent = {
177     $HTML_NS => {
178     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
179 wakaba 1.118 'event-source' => 1, eventsource => 1,
180     command => 1, datatemplate => 1,
181 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
182     ## a metadata content other than |head| element.
183     meta => 1,
184     },
185     ## NOTE: RDF is mentioned in the HTML5 spec.
186     ## TODO: Other RDF elements?
187     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
188     };
189    
190 wakaba 1.72 my $HTMLFlowContent = {
191 wakaba 1.29 $HTML_NS => {
192     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
193     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
194     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
195     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
196 wakaba 1.119 form => 1, fieldset => 1,
197 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
198     datagrid => 1, ## ISSUE: "Flow element" in spec.
199 wakaba 1.29 datatemplate => 1,
200     div => 1, ## ISSUE: No category in spec.
201     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
202     ## Additionally, it must be before any other element or
203     ## non-inter-element-whitespace text node.
204     style => 1,
205    
206 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
207 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
208     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
209 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
210 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
211     command => 1, bb => 1,
212 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
213 wakaba 1.121 textarea => 1, output => 1,
214 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
215     ## NOTE: |area| is allowed only as a descendant of |map|.
216     area => 1,
217    
218 wakaba 1.124 ## NOTE: Transparent.
219     a => 1, ins => 1, del => 1, font => 1,
220 wakaba 1.29
221 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
222 wakaba 1.29 menu => 1,
223    
224     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
225     canvas => 1,
226     },
227    
228     ## NOTE: Embedded
229     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
230     q<http://www.w3.org/2000/svg> => {svg => 1},
231     };
232    
233 wakaba 1.58 my $HTMLSectioningContent = {
234 wakaba 1.57 $HTML_NS => {
235     section => 1, nav => 1, article => 1, aside => 1,
236     ## NOTE: |body| is only allowed in |html| element.
237     body => 1,
238     },
239     };
240    
241 wakaba 1.58 my $HTMLSectioningRoot = {
242 wakaba 1.29 $HTML_NS => {
243 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
244 wakaba 1.29 },
245     };
246    
247     my $HTMLHeadingContent = {
248     $HTML_NS => {
249     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
250     },
251     };
252    
253     my $HTMLPhrasingContent = {
254 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
255 wakaba 1.29 $HTML_NS => {
256 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
257 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
258     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
259 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
260 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
261     command => 1, bb => 1,
262 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
263 wakaba 1.121 textarea => 1, output => 1,
264 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
265     ## NOTE: |area| is allowed only as a descendant of |map|.
266     area => 1,
267    
268     ## NOTE: Transparent.
269 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
270 wakaba 1.29
271 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
272 wakaba 1.29 menu => 1,
273    
274     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
275     canvas => 1,
276     },
277    
278     ## NOTE: Embedded
279     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
280     q<http://www.w3.org/2000/svg> => {svg => 1},
281    
282     ## NOTE: And non-inter-element-whitespace text nodes.
283     };
284    
285 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
286 wakaba 1.29
287     my $HTMLInteractiveContent = {
288     $HTML_NS => {
289     a => 1,
290 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
291     details => 1, datagrid => 1, bb => 1,
292    
293     ## NOTE: When "controls" attribute is specified.
294     video => 1, audio => 1,
295    
296     ## NOTE: When "type=toolbar" attribute is specified.
297     menu => 1,
298 wakaba 1.29 },
299     };
300    
301 wakaba 1.139 ## NOTE: Labelable form-associated element.
302     my $LabelableFAE = {
303     $HTML_NS => {
304     input => 1, button => 1, select => 1, textarea => 1,
305     },
306     };
307    
308 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
309    
310 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
311     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
312    
313     ## -- Common attribute syntacx checkers
314    
315 wakaba 1.1 our $AttrChecker;
316 wakaba 1.82 our $AttrStatus;
317 wakaba 1.1
318     my $GetHTMLEnumeratedAttrChecker = sub {
319     my $states = shift; # {value => conforming ? 1 : -1}
320     return sub {
321     my ($self, $attr) = @_;
322     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
323     if ($states->{$value} > 0) {
324     #
325     } elsif ($states->{$value}) {
326 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
327     level => $self->{level}->{must});
328 wakaba 1.1 } else {
329 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
330     level => $self->{level}->{must});
331 wakaba 1.1 }
332     };
333     }; # $GetHTMLEnumeratedAttrChecker
334    
335     my $GetHTMLBooleanAttrChecker = sub {
336     my $local_name = shift;
337     return sub {
338     my ($self, $attr) = @_;
339 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
340 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
341 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
342 wakaba 1.104 level => $self->{level}->{must});
343 wakaba 1.1 }
344     };
345     }; # $GetHTMLBooleanAttrChecker
346    
347 wakaba 1.8 ## Unordered set of space-separated tokens
348 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
349     my $allowed_words = shift;
350     return sub {
351     my ($self, $attr) = @_;
352     my %word;
353 wakaba 1.132 for my $word (grep {length $_}
354     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
355 wakaba 1.92 unless ($word{$word}) {
356     $word{$word} = 1;
357     if (not defined $allowed_words or
358     $allowed_words->{$word}) {
359     #
360     } else {
361 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
362 wakaba 1.92 value => $word,
363 wakaba 1.104 level => $self->{level}->{must});
364 wakaba 1.92 }
365     } else {
366 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
367     value => $word,
368     level => $self->{level}->{must});
369 wakaba 1.92 }
370 wakaba 1.8 }
371 wakaba 1.92 };
372     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
373 wakaba 1.8
374 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
375 wakaba 1.1 ## whose allowed values are defined by the section on link types)
376     my $HTMLLinkTypesAttrChecker = sub {
377 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
378 wakaba 1.1 my %word;
379 wakaba 1.132 for my $word (grep {length $_}
380     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
381 wakaba 1.1 unless ($word{$word}) {
382     $word{$word} = 1;
383 wakaba 1.18 } elsif ($word eq 'up') {
384     #
385 wakaba 1.1 } else {
386 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
387     value => $word,
388     level => $self->{level}->{must});
389 wakaba 1.1 }
390     }
391     ## NOTE: Case sensitive match (since HTML5 spec does not say link
392     ## types are case-insensitive and it says "The value should not
393     ## be confusingly similar to any other defined value (e.g.
394     ## differing only in case).").
395     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
396     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
397     ## values to be used conformingly.
398 wakaba 1.66
399     my $is_hyperlink;
400     my $is_resource;
401 wakaba 1.1 require Whatpm::_LinkTypeList;
402     our $LinkType;
403     for my $word (keys %word) {
404     my $def = $LinkType->{$word};
405     if (defined $def) {
406     if ($def->{status} eq 'accepted') {
407     if (defined $def->{effect}->[$a_or_area]) {
408     #
409     } else {
410     $self->{onerror}->(node => $attr,
411 wakaba 1.104 type => 'link type:bad context',
412     value => $word,
413 wakaba 1.110 level => $self->{level}->{must});
414 wakaba 1.1 }
415     } elsif ($def->{status} eq 'proposal') {
416 wakaba 1.104 $self->{onerror}->(node => $attr,
417     type => 'link type:proposed',
418     value => $word,
419     level => $self->{level}->{should});
420 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
421     #
422     } else {
423     $self->{onerror}->(node => $attr,
424 wakaba 1.104 type => 'link type:bad context',
425     value => $word,
426     level => $self->{level}->{must});
427 wakaba 1.20 }
428 wakaba 1.1 } else { # rejected or synonym
429     $self->{onerror}->(node => $attr,
430 wakaba 1.104 type => 'link type:non-conforming',
431     value => $word,
432     level => $self->{level}->{must});
433 wakaba 1.1 }
434 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
435     if ($word eq 'alternate') {
436     #
437     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
438 wakaba 1.66 $is_hyperlink = 1;
439 wakaba 1.4 }
440     }
441 wakaba 1.1 if ($def->{unique}) {
442     unless ($self->{has_link_type}->{$word}) {
443     $self->{has_link_type}->{$word} = 1;
444     } else {
445     $self->{onerror}->(node => $attr,
446 wakaba 1.104 type => 'link type:duplicate',
447     value => $word,
448     level => $self->{level}->{must});
449 wakaba 1.1 }
450     }
451 wakaba 1.66
452     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
453     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
454     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
455     }
456 wakaba 1.1 } else {
457 wakaba 1.104 $self->{onerror}->(node => $attr,
458     type => 'unknown link type',
459     value => $word,
460     level => $self->{level}->{uncertain});
461 wakaba 1.1 }
462     }
463 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
464 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
465     ## says that using both X-Pingback: header field and HTML
466     ## <link rel=pingback> is deprecated and if both appears they
467     ## SHOULD contain exactly the same value.
468     ## ISSUE: Pingback 1.0 specification defines the exact representation
469     ## of its link element, which cannot be tested by the current arch.
470     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
471     ## include any string that matches to the pattern for the rel=pingback link,
472     ## which again inpossible to test.
473     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
474 wakaba 1.12
475     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
476 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
477     ## then they SHOULD be described in different paragraphs.".
478 wakaba 1.66
479     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
480     if ($is_hyperlink or $a_or_area) {
481     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
482     }
483     if ($is_resource and not $a_or_area) {
484     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
485     }
486 wakaba 1.96
487     $element_state->{link_rel} = \%word;
488 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
489 wakaba 1.20
490     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
491 wakaba 1.1
492     ## URI (or IRI)
493     my $HTMLURIAttrChecker = sub {
494 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
495 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
496     my $value = $attr->value;
497     Whatpm::URIChecker->check_iri_reference ($value, sub {
498 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
499 wakaba 1.106 }), $self->{level};
500 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
501 wakaba 1.66
502     my $attr_name = $attr->name;
503     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
504     ## TODO: absolute
505     push @{$self->{return}->{uri}->{$value} ||= []},
506     $element_state->{uri_info}->{$attr_name};
507 wakaba 1.1 }; # $HTMLURIAttrChecker
508    
509     ## A space separated list of one or more URIs (or IRIs)
510     my $HTMLSpaceURIsAttrChecker = sub {
511     my ($self, $attr) = @_;
512 wakaba 1.66
513     my $type = {ping => 'action',
514     profile => 'namespace',
515     archive => 'resource'}->{$attr->name};
516    
517 wakaba 1.1 my $i = 0;
518 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
519 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
520 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
521 wakaba 1.106 }, $self->{level});
522 wakaba 1.66
523     ## TODO: absolute
524     push @{$self->{return}->{uri}->{$value} ||= []},
525 wakaba 1.67 {node => $attr, type => {$type => 1}};
526 wakaba 1.66
527 wakaba 1.1 $i++;
528     }
529 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
530 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
531     ## ISSUE: A sequence of white space characters are conformant?
532     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
533 wakaba 1.132 ## ISSUE: What is "space"?
534 wakaba 1.1 ## NOTE: Duplication seems not an error.
535 wakaba 1.4 $self->{has_uri_attr} = 1;
536 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
537    
538 wakaba 1.156 my $ValidEmailAddress;
539     {
540     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
541     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
542     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
543     }
544    
545 wakaba 1.168 ## Valid global date and time.
546     my $GetDateTimeAttrChecker = sub ($) {
547     my $type = shift;
548     return sub {
549     my ($self, $attr, $item, $element_state) = @_;
550    
551     my $range_error;
552    
553     require Message::Date;
554     my $dp = Message::Date->new;
555     $dp->{level} = $self->{level};
556     $dp->{onerror} = sub {
557     my %opt = @_;
558     unless ($opt{type} eq 'date value not supported') {
559     $self->{onerror}->(%opt, node => $attr);
560     $range_error = '';
561     }
562     };
563    
564     my $method = 'parse_' . $type;
565     my $d = $dp->$method ($attr->value);
566     $element_state->{date_value}->{$attr->name} = $d || $range_error;
567     };
568     }; # $GetDateTimeAttrChecker
569 wakaba 1.1
570     my $HTMLIntegerAttrChecker = sub {
571     my ($self, $attr) = @_;
572     my $value = $attr->value;
573     unless ($value =~ /\A-?[0-9]+\z/) {
574 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
575     level => $self->{level}->{must});
576 wakaba 1.1 }
577     }; # $HTMLIntegerAttrChecker
578    
579     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
580     my $range_check = shift;
581     return sub {
582     my ($self, $attr) = @_;
583     my $value = $attr->value;
584     if ($value =~ /\A[0-9]+\z/) {
585     unless ($range_check->($value + 0)) {
586 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
587     level => $self->{level}->{must});
588 wakaba 1.1 }
589     } else {
590     $self->{onerror}->(node => $attr,
591 wakaba 1.104 type => 'nninteger:syntax error',
592     level => $self->{level}->{must});
593 wakaba 1.1 }
594     };
595     }; # $GetHTMLNonNegativeIntegerAttrChecker
596    
597     my $GetHTMLFloatingPointNumberAttrChecker = sub {
598     my $range_check = shift;
599     return sub {
600 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
601 wakaba 1.1 my $value = $attr->value;
602 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
603     $value =~ /\A-?\.[0-9]+\z/) {
604 wakaba 1.168 if ($range_check->($value + 0)) {
605     ## TODO: parse algorithm
606     $element_state->{number_value}->{$attr->name} = $value + 0;
607     } else {
608 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
609     level => $self->{level}->{must});
610 wakaba 1.1 }
611     } else {
612     $self->{onerror}->(node => $attr,
613 wakaba 1.104 type => 'float:syntax error',
614     level => $self->{level}->{must});
615 wakaba 1.1 }
616     };
617 wakaba 1.144
618     ## TODO: scientific notation
619 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
620    
621 wakaba 1.148 my $StepAttrChecker = sub {
622     ## NOTE: A valid floating point number (> 0), or ASCII
623     ## case-insensitive "any".
624    
625     my ($self, $attr) = @_;
626     my $value = $attr->value;
627     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
628     $value =~ /\A-?\.[0-9]+\z/) {
629     unless ($value > 0) {
630     $self->{onerror}->(node => $attr, type => 'float:out of range',
631     level => $self->{level}->{must});
632     }
633     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
634     #
635     } else {
636     $self->{onerror}->(node => $attr,
637     type => 'float:syntax error',
638     level => $self->{level}->{must});
639     }
640    
641     ## TODO: scientific
642     }; # $StepAttrChecker
643    
644 wakaba 1.86 ## HTML4 %Length;
645     my $HTMLLengthAttrChecker = sub {
646     my ($self, $attr) = @_;
647     my $value = $attr->value;
648     unless ($value =~ /\A[0-9]+%?\z/) {
649     $self->{onerror}->(node => $attr, type => 'length:syntax error',
650 wakaba 1.104 level => $self->{level}->{must});
651 wakaba 1.86 }
652    
653     ## NOTE: HTML4 definition is too vague - it does not define the syntax
654     ## of percentage value at all (!).
655     }; # $HTMLLengthAttrChecker
656    
657 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
658     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
659     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
660    
661 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
662     ## ISSUE: RFC 2046 does not define syntax of media types.
663     ## ISSUE: The definition of "a valid MIME type" is unknown.
664     ## Syntactical correctness?
665     my $HTMLIMTAttrChecker = sub {
666     my ($self, $attr) = @_;
667     my $value = $attr->value;
668     ## ISSUE: RFC 2045 Content-Type header field allows insertion
669     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
670     ## ISSUE: RFC 2231 extension? Maybe no.
671     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
672     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
673 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
674 wakaba 1.1 my @type = ($1, $2);
675     my $param = $3;
676 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
677 wakaba 1.1 if (defined $2) {
678     push @type, $1 => $2;
679     } else {
680     my $n = $1;
681 wakaba 1.152 my $v = $3;
682 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
683 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
684 wakaba 1.1 }
685     }
686     require Whatpm::IMTChecker;
687 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
688     $ic->{level} = $self->{level};
689     $ic->check_imt (sub {
690 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
691 wakaba 1.1 }, @type);
692     } else {
693 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
694     level => $self->{level}->{must});
695 wakaba 1.1 }
696     }; # $HTMLIMTAttrChecker
697    
698     my $HTMLLanguageTagAttrChecker = sub {
699 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
700    
701 wakaba 1.1 my ($self, $attr) = @_;
702 wakaba 1.6 my $value = $attr->value;
703     require Whatpm::LangTag;
704     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
705 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
706 wakaba 1.106 }, $self->{level});
707 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
708 wakaba 1.6
709     ## TODO: testdata
710 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
711    
712     ## "A valid media query [MQ]"
713     my $HTMLMQAttrChecker = sub {
714     my ($self, $attr) = @_;
715 wakaba 1.104 $self->{onerror}->(node => $attr,
716     type => 'media query',
717     level => $self->{level}->{uncertain});
718 wakaba 1.1 ## ISSUE: What is "a valid media query"?
719     }; # $HTMLMQAttrChecker
720    
721     my $HTMLEventHandlerAttrChecker = sub {
722     my ($self, $attr) = @_;
723 wakaba 1.104 $self->{onerror}->(node => $attr,
724     type => 'event handler',
725     level => $self->{level}->{uncertain});
726 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
727     ## ECMAScript |FunctionBody| production. [ECMA262]
728     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
729     ## ISSUE: Automatic semicolon insertion does not apply?
730     ## ISSUE: Other script languages?
731     }; # $HTMLEventHandlerAttrChecker
732    
733 wakaba 1.136 my $HTMLFormAttrChecker = sub {
734     my ($self, $attr) = @_;
735    
736     ## NOTE: MUST be the ID of a |form| element.
737    
738     my $value = $attr->value;
739 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
740 wakaba 1.136
741     ## ISSUE: <form id=""><input form=""> (empty ID)?
742     }; # $HTMLFormAttrChecker
743    
744 wakaba 1.158 my $ListAttrChecker = sub {
745     my ($self, $attr) = @_;
746    
747     ## NOTE: MUST be the ID of a |datalist| element.
748    
749     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
750    
751     ## TODO: Warn violation to control-dependent restrictions. For
752     ## example, |<input type=url maxlength=10 list=a> <datalist
753     ## id=a><option value=nonurlandtoolong></datalist>| should be
754     ## warned.
755     }; # $ListAttrChecker
756    
757 wakaba 1.160 my $PatternAttrChecker = sub {
758     my ($self, $attr) = @_;
759     $self->{onsubdoc}->({s => $attr->value,
760     container_node => $attr,
761     media_type => 'text/x-regexp-js',
762     is_char_string => 1});
763 wakaba 1.161
764     ## ISSUE: "value must match the Pattern production of ECMA 262's
765     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
766    
767     ## TODO: Warn if @value does not match @pattern.
768 wakaba 1.160 }; # $PatternAttrChecker
769    
770 wakaba 1.161 my $AcceptAttrChecker = sub {
771     my ($self, $attr) = @_;
772    
773     my $value = $attr->value;
774     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
775     my @value = length $value ? split /,/, $value, -1 : ('');
776     my %has_value;
777     for my $v (@value) {
778     if ($has_value{$v}) {
779     $self->{onerror}->(node => $attr,
780     type => 'duplicate token',
781     value => $v,
782     level => $self->{level}->{must});
783     next;
784     }
785     $has_value{$v} = 1;
786    
787     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
788     #
789     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
790     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
791     ## define its own syntax citing RFC 4288.
792    
793     ## NOTE: Parameters not allowed.
794     require Whatpm::IMTChecker;
795     my $ic = Whatpm::IMTChecker->new;
796     $ic->{level} = $self->{level};
797     $ic->check_imt (sub {
798     $self->{onerror}->(@_, node => $attr);
799     }, $1, $2);
800     } else {
801     $self->{onerror}->(node => $attr,
802     type => 'IMTnp:syntax error', ## TODOC: type
803     value => $v,
804     level => $self->{level}->{must});
805     }
806     }
807     }; # $AcceptAttrChecker
808    
809 wakaba 1.165 my $FormControlNameAttrChecker = sub {
810     my ($self, $attr) = @_;
811    
812     unless (length $attr->value) {
813     $self->{onerror}->(node => $attr,
814     type => 'empty control name', ## TODOC: type
815     level => $self->{level}->{must});
816     }
817    
818     ## NOTE: No uniqueness constraint.
819     }; # $FormControlNameAttrChecker
820    
821     my $AutofocusAttrChecker = sub {
822     my ($self, $attr) = @_;
823    
824     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
825    
826     if ($self->{has_autofocus}) {
827     $self->{onerror}->(node => $attr,
828     type => 'duplicate autofocus', ## TODOC: type
829     level => $self->{level}->{must});
830     }
831     $self->{has_autofocus} = 1;
832     }; # $AutofocusAttrChekcer
833    
834 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
835     my ($self, $attr) = @_;
836 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
837 wakaba 1.1 my $value = $attr->value;
838     if ($value =~ s/^#//) {
839 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
840     ## according to the "rules for parsing a hash-name reference" algorithm.
841     ## The document is non-conforming anyway, since |<map name="">| (empty
842     ## name) is non-conforming.
843 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
844     } else {
845 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
846     level => $self->{level}->{must});
847 wakaba 1.1 }
848 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
849 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
850     }; # $HTMLUsemapAttrChecker
851    
852 wakaba 1.76 ## Valid browsing context name
853     my $HTMLBrowsingContextNameAttrChecker = sub {
854     my ($self, $attr) = @_;
855     my $value = $attr->value;
856     if ($value =~ /^_/) {
857     $self->{onerror}->(node => $attr, type => 'window name:reserved',
858 wakaba 1.104 level => $self->{level}->{must},
859 wakaba 1.76 value => $value);
860     } elsif (length $value) {
861     #
862     } else {
863     $self->{onerror}->(node => $attr, type => 'window name:empty',
864 wakaba 1.104 level => $self->{level}->{must});
865 wakaba 1.76 }
866     }; # $HTMLBrowsingContextNameAttrChecker
867    
868     ## Valid browsing context name or keyword
869 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
870     my ($self, $attr) = @_;
871     my $value = $attr->value;
872     if ($value =~ /^_/) {
873     $value = lc $value; ## ISSUE: ASCII case-insentitive?
874     unless ({
875 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
876 wakaba 1.1 }->{$value}) {
877     $self->{onerror}->(node => $attr,
878 wakaba 1.76 type => 'window name:reserved',
879 wakaba 1.104 level => $self->{level}->{must},
880 wakaba 1.76 value => $value);
881 wakaba 1.1 }
882 wakaba 1.76 } elsif (length $value) {
883     #
884 wakaba 1.1 } else {
885 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
886 wakaba 1.104 level => $self->{level}->{must});
887 wakaba 1.1 }
888     }; # $HTMLTargetAttrChecker
889    
890 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
891     my ($self, $attr) = @_;
892    
893     ## ISSUE: Namespace resolution?
894    
895     my $value = $attr->value;
896    
897     require Whatpm::CSS::SelectorsParser;
898     my $p = Whatpm::CSS::SelectorsParser->new;
899     $p->{pseudo_class}->{$_} = 1 for qw/
900     active checked disabled empty enabled first-child first-of-type
901     focus hover indeterminate last-child last-of-type link only-child
902     only-of-type root target visited
903     lang nth-child nth-last-child nth-of-type nth-last-of-type not
904     -manakai-contains -manakai-current
905     /;
906    
907     $p->{pseudo_element}->{$_} = 1 for qw/
908     after before first-letter first-line
909     /;
910    
911 wakaba 1.104 $p->{level} = $self->{level};
912 wakaba 1.23 $p->{onerror} = sub {
913 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
914 wakaba 1.23 };
915     $p->parse_string ($value);
916     }; # $HTMLSelectorsAttrChecker
917    
918 wakaba 1.66 my $HTMLAccesskeyAttrChecker = sub {
919     my ($self, $attr) = @_;
920    
921     ## NOTE: "character" or |%Character;| in HTML4.
922    
923     my $value = $attr->value;
924     if (length $value != 1) {
925     $self->{onerror}->(node => $attr, type => 'char:syntax error',
926 wakaba 1.105 level => $self->{level}->{html4_fact});
927 wakaba 1.66 }
928    
929     ## NOTE: "Note. Authors should consider the input method of the expected
930     ## reader when specifying an accesskey." [HTML4] This is hard to implement,
931     ## since it depends on keyboard and so on.
932     ## NOTE: "We recommend that authors include the access key in label text
933     ## or wherever the access key is to apply." [HTML4] (informative)
934     }; # $HTMLAccesskeyAttrChecker
935    
936 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
937     my ($charset_value, $self, $attr, $ascii_compat) = @_;
938    
939     ## NOTE: This code is used for |charset=""| attributes, |charset=|
940     ## portion of the |content=""| attributes, and |accept-charset=""|
941     ## attributes.
942 wakaba 1.91
943     ## NOTE: Though the case-sensitivility of |charset| attribute value
944     ## is not explicitly spelled in the HTML5 spec, the Character Set
945     ## registry of IANA, which is referenced from HTML5 spec, says that
946     ## charset name is case-insensitive.
947     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
948    
949     require Message::Charset::Info;
950     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
951    
952     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
953     ## Syntactically valid and registered? What about x-charset names?
954     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
955     ($charset_value)) {
956     $self->{onerror}->(node => $attr,
957 wakaba 1.104 type => 'charset:syntax error',
958     value => $charset_value,
959     level => $self->{level}->{must});
960 wakaba 1.91 }
961    
962     if ($charset) {
963     ## ISSUE: What is "the preferred name for that encoding" (for a charset
964     ## with no "preferred MIME name" label)?
965     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
966     if (($charset_status &
967     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
968     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
969     $self->{onerror}->(node => $attr,
970 wakaba 1.104 type => 'charset:not preferred',
971     value => $charset_value,
972     level => $self->{level}->{must});
973 wakaba 1.91 }
974 wakaba 1.129
975 wakaba 1.91 if (($charset_status &
976     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
977     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
978     if ($charset_value =~ /^x-/) {
979     $self->{onerror}->(node => $attr,
980 wakaba 1.104 type => 'charset:private',
981     value => $charset_value,
982     level => $self->{level}->{good});
983 wakaba 1.91 } else {
984     $self->{onerror}->(node => $attr,
985 wakaba 1.104 type => 'charset:not registered',
986     value => $charset_value,
987     level => $self->{level}->{good});
988 wakaba 1.91 }
989     }
990 wakaba 1.129
991     if ($ascii_compat) {
992     if ($charset->{category} &
993     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
994     #
995     } else {
996     $self->{onerror}->(node => $attr,
997     type => 'charset:not ascii compat',
998     value => $charset_value,
999     level => $self->{level}->{must});
1000     }
1001     }
1002    
1003 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
1004     } elsif ($charset_value =~ /^x-/) {
1005     $self->{onerror}->(node => $attr,
1006 wakaba 1.104 type => 'charset:private',
1007     value => $charset_value,
1008     level => $self->{level}->{good});
1009 wakaba 1.129
1010     ## NOTE: Whether this is an ASCII-compatible character encoding or
1011     ## not is unknown.
1012 wakaba 1.91 } else {
1013     $self->{onerror}->(node => $attr,
1014 wakaba 1.104 type => 'charset:not registered',
1015     value => $charset_value,
1016     level => $self->{level}->{good});
1017 wakaba 1.129
1018     ## NOTE: Whether this is an ASCII-compatible character encoding or
1019     ## not is unknown.
1020 wakaba 1.91 }
1021    
1022     return ($charset, $charset_value);
1023     }; # $HTMLCharsetChecker
1024    
1025 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1026     ## MUST be the preferred name of an ASCII-compatible character
1027     ## encoding".
1028     my $HTMLCharsetsAttrChecker = sub {
1029     my ($self, $attr) = @_;
1030    
1031     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1032    
1033 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1034 wakaba 1.129
1035     ## ISSUE: Uniqueness is not enforced.
1036    
1037     for my $charset (@value) {
1038     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1039     }
1040    
1041     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1042     }; # $HTMLCharsetsAttrChecker
1043    
1044 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1045     my ($self, $attr) = @_;
1046    
1047     ## NOTE: HTML4 "color" or |%Color;|
1048    
1049     my $value = $attr->value;
1050    
1051     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1052 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1053 wakaba 1.105 level => $self->{level}->{html4_fact});
1054 wakaba 1.68 }
1055    
1056     ## TODO: HTML4 has some guideline on usage of color.
1057     }; # $HTMLColorAttrChecker
1058    
1059 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1060     my ($self, $attr) = @_;
1061     $HTMLURIAttrChecker->(@_);
1062    
1063     my $attr_name = $attr->name;
1064    
1065     if ($attr_name eq 'ref') {
1066     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1067     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1068 wakaba 1.104 level => $self->{level}->{must});
1069 wakaba 1.79 }
1070     }
1071 wakaba 1.155
1072     require Message::URL;
1073 wakaba 1.79 my $doc = $attr->owner_document;
1074     my $doc_uri = $doc->document_uri;
1075 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1076 wakaba 1.79 my $no_frag_uri = $uri->clone;
1077     $no_frag_uri->uri_fragment (undef);
1078     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1079     (not defined $doc_uri and $no_frag_uri eq '')) {
1080     my $fragid = $uri->uri_fragment;
1081     if (defined $fragid) {
1082     push @{$self->{$attr_name}}, [$fragid => $attr];
1083     } else {
1084     DOCEL: {
1085     last DOCEL unless $attr_name eq 'template';
1086    
1087     my $docel = $doc->document_element;
1088     if ($docel) {
1089     my $nsuri = $docel->namespace_uri;
1090     if (defined $nsuri and $nsuri eq $HTML_NS) {
1091     if ($docel->manakai_local_name eq 'datatemplate') {
1092     last DOCEL;
1093     }
1094     }
1095     }
1096    
1097     $self->{onerror}->(node => $attr, type => 'template:not template',
1098 wakaba 1.104 level => $self->{level}->{must});
1099 wakaba 1.79 } # DOCEL
1100     }
1101     } else {
1102     ## TODO: An external document is referenced.
1103     ## The document MUST be an HTML or XML document.
1104     ## If there is a fragment identifier, it MUST point a part of the doc.
1105     ## If the attribute is |template|, the pointed part MUST be a
1106     ## |datatemplat| element.
1107     ## If no fragment identifier is specified, the root element MUST be
1108     ## a |datatemplate| element when the attribute is |template|.
1109     }
1110     }; # $HTMLRefOrTemplateAttrChecker
1111    
1112 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1113     my ($self, $attr) = @_;
1114    
1115     if (defined $attr->namespace_uri) {
1116     my $oe = $attr->owner_element;
1117     my $oe_nsuri = $oe->namespace_uri;
1118 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1119 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1120 wakaba 1.104 level => $self->{level}->{must});
1121 wakaba 1.83 }
1122     }
1123    
1124     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1125     }; # $HTMLRepeatIndexAttrChecker
1126    
1127 wakaba 1.1 my $HTMLAttrChecker = {
1128 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1129 wakaba 1.1 id => sub {
1130 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1131 wakaba 1.1 my $value = $attr->value;
1132     if (length $value > 0) {
1133     if ($self->{id}->{$value}) {
1134 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1135     level => $self->{level}->{must});
1136 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1137     } else {
1138     $self->{id}->{$value} = [$attr];
1139 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1140 wakaba 1.1 }
1141 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1142 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1143     level => $self->{level}->{must});
1144 wakaba 1.1 }
1145     } else {
1146     ## NOTE: MUST contain at least one character
1147 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1148     level => $self->{level}->{must});
1149 wakaba 1.1 }
1150     },
1151     title => sub {}, ## NOTE: No conformance creteria
1152     lang => sub {
1153     my ($self, $attr) = @_;
1154 wakaba 1.6 my $value = $attr->value;
1155     if ($value eq '') {
1156     #
1157     } else {
1158     require Whatpm::LangTag;
1159     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1160 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1161 wakaba 1.106 }, $self->{level});
1162 wakaba 1.6 }
1163 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1164 wakaba 1.6
1165     ## TODO: test data
1166 wakaba 1.111
1167     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1168     ## non-conforming. Such errors are detected by the checkers of
1169     ## |{}xml:lang| and |{xml}:lang| attributes.
1170 wakaba 1.1 },
1171     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1172     class => sub {
1173     my ($self, $attr) = @_;
1174 wakaba 1.132
1175     ## NOTE: "Unordered set of unique space-separated tokens".
1176    
1177 wakaba 1.1 my %word;
1178 wakaba 1.132 for my $word (grep {length $_}
1179     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1180 wakaba 1.1 unless ($word{$word}) {
1181     $word{$word} = 1;
1182     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1183     } else {
1184 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1185     value => $word,
1186     level => $self->{level}->{must});
1187 wakaba 1.1 }
1188     }
1189     },
1190 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1191     true => 1, false => 1, '' => 1,
1192     }),
1193 wakaba 1.1 contextmenu => sub {
1194     my ($self, $attr) = @_;
1195     my $value = $attr->value;
1196 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1197 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1198     ## What is "in the DOM"? A menu Element node that is not part
1199     ## of the Document tree is in the DOM? A menu Element node that
1200     ## belong to another Document tree is in the DOM?
1201     },
1202 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1203 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1204 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1205     registrationmark => sub {
1206     my ($self, $attr, $item, $element_state) = @_;
1207    
1208     ## NOTE: Any value is conforming.
1209    
1210     if ($self->{flag}->{in_rule}) {
1211     my $el = $attr->owner_element;
1212     my $ln = $el->manakai_local_name;
1213     if ($ln eq 'nest' or
1214     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1215     my $nsuri = $el->namespace_uri;
1216     if (defined $nsuri and $nsuri eq $HTML_NS) {
1217     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1218 wakaba 1.104 level => $self->{level}->{must});
1219 wakaba 1.79 }
1220     }
1221     } else {
1222     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1223 wakaba 1.104 level => $self->{level}->{must});
1224 wakaba 1.79 }
1225     },
1226 wakaba 1.80 repeat => sub {
1227     my ($self, $attr) = @_;
1228 wakaba 1.83
1229     if (defined $attr->namespace_uri) {
1230     my $oe = $attr->owner_element;
1231     my $oe_nsuri = $oe->namespace_uri;
1232     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1233     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1234 wakaba 1.104 level => $self->{level}->{must});
1235 wakaba 1.83 }
1236     }
1237    
1238 wakaba 1.80 my $value = $attr->value;
1239     if ($value eq 'template') {
1240     #
1241     } elsif ($value =~ /\A-?[0-9]+\z/) {
1242     #
1243     } else {
1244     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1245 wakaba 1.104 level => $self->{level}->{must});
1246 wakaba 1.80 }
1247    
1248     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1249     ## that the attribute MAY be specified to any element, or that the
1250     ## element with that attribute (i.e. a repetition template) can be
1251     ## inserted anywhere in a document tree?
1252     },
1253 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1254     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1255     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1256 wakaba 1.80 'repeat-template' => sub {
1257 wakaba 1.83 my ($self, $attr) = @_;
1258    
1259     if (defined $attr->namespace_uri) {
1260     my $oe = $attr->owner_element;
1261     my $oe_nsuri = $oe->namespace_uri;
1262 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1263 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1264 wakaba 1.104 level => $self->{level}->{must});
1265 wakaba 1.83 }
1266     }
1267    
1268 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1269     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1270     ## attribute allowed on an element that is not a repetition block?
1271     },
1272 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1273 wakaba 1.128 style => sub {
1274     my ($self, $attr) = @_;
1275    
1276     $self->{onsubdoc}->({s => $attr->value,
1277     container_node => $attr,
1278     media_type => 'text/x-css-inline',
1279     is_char_string => 1});
1280    
1281     ## NOTE: "... MUST still be comprehensible and usable if those
1282     ## attributes were removed" is a semantic requirement, it cannot
1283     ## be tested.
1284     },
1285 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1286 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1287 wakaba 1.111 'xml:lang' => sub {
1288     my ($self, $attr) = @_;
1289    
1290     if ($attr->owner_document->manakai_is_html) {
1291     $self->{onerror}->(type => 'in HTML:xml:lang',
1292     level => $self->{level}->{info},
1293     node => $attr);
1294     ## NOTE: This is not an error, but the attribute will be ignored.
1295     } else {
1296     $self->{onerror}->(type => 'in XML:xml:lang',
1297     level => $self->{level}->{html5_no_may},
1298     node => $attr);
1299     ## TODO: We need to add test for this error.
1300     }
1301    
1302     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1303     (undef, 'lang');
1304     if ($lang_attr) {
1305     my $lang_attr_value = $lang_attr->value;
1306     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1307     my $value = $attr->value;
1308     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1309     if ($lang_attr_value ne $value) {
1310     $self->{onerror}->(type => 'xml:lang ne lang',
1311     level => $self->{level}->{must},
1312     node => $attr);
1313     }
1314     } else {
1315     $self->{onerror}->(type => 'xml:lang not allowed',
1316     level => $self->{level}->{must},
1317     node => $attr);
1318     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1319     }
1320     },
1321 wakaba 1.74 xmlns => sub {
1322     my ($self, $attr) = @_;
1323     my $value = $attr->value;
1324     unless ($value eq $HTML_NS) {
1325 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1326     level => $self->{level}->{must});
1327 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1328     }
1329     unless ($attr->owner_document->manakai_is_html) {
1330 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1331     level => $self->{level}->{must});
1332 wakaba 1.74 ## TODO: Test
1333     }
1334    
1335     ## TODO: Should be resolved?
1336     push @{$self->{return}->{uri}->{$value} ||= []},
1337     {node => $attr, type => {namespace => 1}};
1338     },
1339 wakaba 1.1 };
1340    
1341 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1342    
1343 wakaba 1.49 my %HTMLAttrStatus = (
1344 wakaba 1.153 class => FEATURE_HTML5_WD,
1345 wakaba 1.50 contenteditable => FEATURE_HTML5_DEFAULT,
1346     contextmenu => FEATURE_HTML5_WD,
1347 wakaba 1.153 dir => FEATURE_HTML5_WD,
1348 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1349 wakaba 1.115 hidden => FEATURE_HTML5_DEFAULT,
1350 wakaba 1.153 id => FEATURE_HTML5_WD,
1351 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1352 wakaba 1.153 lang => FEATURE_HTML5_WD,
1353 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1354     registrationmark => FEATURE_HTML5_AT_RISK,
1355 wakaba 1.60 repeat => FEATURE_WF2,
1356     'repeat-max' => FEATURE_WF2,
1357     'repeat-min' => FEATURE_WF2,
1358     'repeat-start' => FEATURE_WF2,
1359     'repeat-template' => FEATURE_WF2,
1360 wakaba 1.154 role => 0,
1361 wakaba 1.153 style => FEATURE_HTML5_WD,
1362 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1363     template => FEATURE_HTML5_AT_RISK,
1364 wakaba 1.153 title => FEATURE_HTML5_WD,
1365 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1366 wakaba 1.49 );
1367    
1368     my %HTMLM12NCommonAttrStatus = (
1369 wakaba 1.154 about => FEATURE_RDFA_REC,
1370 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1371 wakaba 1.154 content => FEATURE_RDFA_REC,
1372     datatype => FEATURE_RDFA_REC,
1373 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1374 wakaba 1.154 href => FEATURE_RDFA_REC,
1375 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1376 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1377 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1378     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1379     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1380     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1381     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1382     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1383     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1384     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1385     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1386     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1387 wakaba 1.154 property => FEATURE_RDFA_REC,
1388     rel => FEATURE_RDFA_REC,
1389     resource => FEATURE_RDFA_REC,
1390     rev => FEATURE_RDFA_REC,
1391 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1392 wakaba 1.78 # FEATURE_M12N10_REC,
1393 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1394 wakaba 1.55 FEATURE_M12N10_REC,
1395 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1396 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1397 wakaba 1.49 );
1398    
1399 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1400     ## Core
1401 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1402     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1403 wakaba 1.82 #xml:id
1404     layout => FEATURE_XHTML2_ED,
1405 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1406 wakaba 1.82
1407     ## Hypertext
1408     cite => FEATURE_XHTML2_ED,
1409     href => FEATURE_XHTML2_ED,
1410     hreflang => FEATURE_XHTML2_ED,
1411     hrefmedia => FEATURE_XHTML2_ED,
1412     hreftype => FEATURE_XHTML2_ED,
1413     nextfocus => FEATURE_XHTML2_ED,
1414     prevfocus => FEATURE_XHTML2_ED,
1415     target => FEATURE_XHTML2_ED,
1416     #xml:base
1417    
1418     ## I18N
1419     #xml:lang
1420    
1421     ## Bi-directional
1422 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1423 wakaba 1.82
1424     ## Edit
1425     edit => FEATURE_XHTML2_ED,
1426     datetime => FEATURE_XHTML2_ED,
1427    
1428     ## Embedding
1429     encoding => FEATURE_XHTML2_ED,
1430     src => FEATURE_XHTML2_ED,
1431     srctype => FEATURE_XHTML2_ED,
1432    
1433     ## Image Map
1434     usemap => FEATURE_XHTML2_ED,
1435     ismap => FEATURE_XHTML2_ED,
1436     shape => FEATURE_XHTML2_ED,
1437     coords => FEATURE_XHTML2_ED,
1438    
1439     ## Media
1440     media => FEATURE_XHTML2_ED,
1441    
1442     ## Metadata
1443     about => FEATURE_XHTML2_ED,
1444     content => FEATURE_XHTML2_ED,
1445     datatype => FEATURE_XHTML2_ED,
1446     instanceof => FEATURE_XHTML2_ED,
1447     property => FEATURE_XHTML2_ED,
1448     rel => FEATURE_XHTML2_ED,
1449     resource => FEATURE_XHTML2_ED,
1450     rev => FEATURE_XHTML2_ED,
1451    
1452     ## Role
1453 wakaba 1.154 role => FEATURE_XHTML2_ED,
1454 wakaba 1.82
1455     ## Style
1456 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML2_ED, # "strongly discouraged"
1457 wakaba 1.82 );
1458    
1459     my %HTMLM12NXHTML2CommonAttrStatus = (
1460     %HTMLM12NCommonAttrStatus,
1461     %XHTML2CommonAttrStatus,
1462    
1463 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1464 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1465 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1466     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1467 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1468 wakaba 1.154 href => FEATURE_RDFA_REC,
1469 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1470 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1471     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1472     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1473     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1474     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1475 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1476 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1477 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1478 wakaba 1.82 FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1479 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1480 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1481 wakaba 1.82 );
1482    
1483 wakaba 1.1 for (qw/
1484     onabort onbeforeunload onblur onchange onclick oncontextmenu
1485     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1486     ondragstart ondrop onerror onfocus onkeydown onkeypress
1487     onkeyup onload onmessage onmousedown onmousemove onmouseout
1488     onmouseover onmouseup onmousewheel onresize onscroll onselect
1489 wakaba 1.77 onstorage onsubmit onunload
1490 wakaba 1.1 /) {
1491     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1492 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1493 wakaba 1.1 }
1494    
1495 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1496     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1497     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1498    
1499     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1500     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1501     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1502     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1503     }
1504    
1505 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1506 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1507 wakaba 1.82 }
1508 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1509     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1510 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1511     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1512     ismap layout media nextfocus prevfocus shape src srctype style
1513     target usemap/) {
1514     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1515     }
1516     for (qw/class dir id title/) {
1517     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1518     }
1519     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1520     onmouseout onkeypress onkeydown onkeyup/) {
1521     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1522     }
1523    
1524 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1525     ## NOTE: "Authors should ... when the attributes are ignored and
1526     ## any associated CSS dropped, the page is still usable." (semantic
1527     ## constraint.)
1528     }; # $HTMLDatasetAttrChecker
1529    
1530 wakaba 1.153 my $HTMLDatasetAttrStatus = FEATURE_HTML5_WD;
1531 wakaba 1.73
1532 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1533     my $element_specific_checker = shift;
1534 wakaba 1.49 my $element_specific_status = shift;
1535 wakaba 1.1 return sub {
1536 wakaba 1.40 my ($self, $item, $element_state) = @_;
1537     for my $attr (@{$item->{node}->attributes}) {
1538 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1539     $attr_ns = '' unless defined $attr_ns;
1540     my $attr_ln = $attr->manakai_local_name;
1541     my $checker;
1542 wakaba 1.73 my $status;
1543 wakaba 1.1 if ($attr_ns eq '') {
1544 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1545     $attr_ln !~ /[A-Z]/) {
1546 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1547     $status = $HTMLDatasetAttrStatus;
1548     } else {
1549     $checker = $element_specific_checker->{$attr_ln}
1550     || $HTMLAttrChecker->{$attr_ln};
1551     $status = $element_specific_status->{$attr_ln};
1552     }
1553 wakaba 1.1 }
1554     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1555 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1556 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1557     || $AttrStatus->{$attr_ns}->{''};
1558     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1559 wakaba 1.1 if ($checker) {
1560 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1561 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1562 wakaba 1.54 #
1563 wakaba 1.1 } else {
1564 wakaba 1.104 $self->{onerror}->(node => $attr,
1565     type => 'unknown attribute',
1566     level => $self->{level}->{uncertain});
1567 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1568     }
1569 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1570 wakaba 1.1 }
1571     };
1572     }; # $GetHTMLAttrsChecker
1573    
1574 wakaba 1.40 my %HTMLChecker = (
1575     %Whatpm::ContentChecker::AnyChecker,
1576 wakaba 1.79 check_start => sub {
1577     my ($self, $item, $element_state) = @_;
1578    
1579     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1580     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1581     },
1582 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1583 wakaba 1.40 );
1584    
1585     my %HTMLEmptyChecker = (
1586     %HTMLChecker,
1587     check_child_element => sub {
1588     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1589     $child_is_transparent, $element_state) = @_;
1590 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1591     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1592 wakaba 1.40 $self->{onerror}->(node => $child_el,
1593     type => 'element not allowed:minus',
1594 wakaba 1.104 level => $self->{level}->{must});
1595 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1596     #
1597     } else {
1598     $self->{onerror}->(node => $child_el,
1599     type => 'element not allowed:empty',
1600 wakaba 1.104 level => $self->{level}->{must});
1601 wakaba 1.40 }
1602     },
1603     check_child_text => sub {
1604     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1605     if ($has_significant) {
1606     $self->{onerror}->(node => $child_node,
1607     type => 'character not allowed:empty',
1608 wakaba 1.104 level => $self->{level}->{must});
1609 wakaba 1.40 }
1610     },
1611     );
1612    
1613     my %HTMLTextChecker = (
1614     %HTMLChecker,
1615     check_child_element => sub {
1616     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1617     $child_is_transparent, $element_state) = @_;
1618 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1619     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1620 wakaba 1.40 $self->{onerror}->(node => $child_el,
1621     type => 'element not allowed:minus',
1622 wakaba 1.104 level => $self->{level}->{must});
1623 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1624     #
1625     } else {
1626 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1627     level => $self->{level}->{must});
1628 wakaba 1.40 }
1629     },
1630     );
1631    
1632 wakaba 1.72 my %HTMLFlowContentChecker = (
1633 wakaba 1.40 %HTMLChecker,
1634     check_child_element => sub {
1635     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1636     $child_is_transparent, $element_state) = @_;
1637 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1638     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1639 wakaba 1.40 $self->{onerror}->(node => $child_el,
1640     type => 'element not allowed:minus',
1641 wakaba 1.104 level => $self->{level}->{must});
1642 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1643     #
1644     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1645     if ($element_state->{has_non_style} or
1646     not $child_el->has_attribute_ns (undef, 'scoped')) {
1647 wakaba 1.104 $self->{onerror}->(node => $child_el,
1648 wakaba 1.72 type => 'element not allowed:flow style',
1649 wakaba 1.104 level => $self->{level}->{must});
1650 wakaba 1.40 }
1651 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1652 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1653 wakaba 1.40 } else {
1654     $element_state->{has_non_style} = 1;
1655 wakaba 1.104 $self->{onerror}->(node => $child_el,
1656 wakaba 1.72 type => 'element not allowed:flow',
1657 wakaba 1.104 level => $self->{level}->{must})
1658 wakaba 1.40 }
1659     },
1660     check_child_text => sub {
1661     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1662     if ($has_significant) {
1663     $element_state->{has_non_style} = 1;
1664     }
1665     },
1666     check_end => sub {
1667     my ($self, $item, $element_state) = @_;
1668 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1669 wakaba 1.40 if ($element_state->{has_significant}) {
1670 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1671 wakaba 1.40 } elsif ($item->{transparent}) {
1672     #
1673     } else {
1674     $self->{onerror}->(node => $item->{node},
1675 wakaba 1.104 level => $self->{level}->{should},
1676 wakaba 1.40 type => 'no significant content');
1677     }
1678     },
1679     );
1680    
1681     my %HTMLPhrasingContentChecker = (
1682     %HTMLChecker,
1683     check_child_element => sub {
1684     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1685     $child_is_transparent, $element_state) = @_;
1686 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1687     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1688 wakaba 1.40 $self->{onerror}->(node => $child_el,
1689     type => 'element not allowed:minus',
1690 wakaba 1.104 level => $self->{level}->{must});
1691 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1692     #
1693     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1694     #
1695     } else {
1696     $self->{onerror}->(node => $child_el,
1697     type => 'element not allowed:phrasing',
1698 wakaba 1.104 level => $self->{level}->{must});
1699 wakaba 1.40 }
1700     },
1701 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1702 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1703 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1704 wakaba 1.40 ## and |check_child_text|.
1705     );
1706    
1707 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1708 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1709 wakaba 1.46 ## with parent?
1710 wakaba 1.40
1711 wakaba 1.1 our $Element;
1712     our $ElementDefault;
1713    
1714     $Element->{$HTML_NS}->{''} = {
1715 wakaba 1.40 %HTMLChecker,
1716 wakaba 1.1 };
1717    
1718     $Element->{$HTML_NS}->{html} = {
1719 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1720 wakaba 1.1 is_root => 1,
1721 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1722 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1723 wakaba 1.67 version => sub {
1724     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1725     ## Though DTDs of various versions of HTML define the attribute
1726     ## as |#FIXED|, this conformance checker does no check for
1727     ## the attribute value, since what kind of check should be done
1728     ## is unknown.
1729     },
1730 wakaba 1.49 }, {
1731     %HTMLAttrStatus,
1732 wakaba 1.82 %XHTML2CommonAttrStatus,
1733 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1734     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1735     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1736     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1737     manifest => FEATURE_HTML5_WD,
1738 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1739 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1740 wakaba 1.1 }),
1741 wakaba 1.40 check_start => sub {
1742     my ($self, $item, $element_state) = @_;
1743     $element_state->{phase} = 'before head';
1744 wakaba 1.79
1745 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1746 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1747     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1748 wakaba 1.40 },
1749     check_child_element => sub {
1750     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1751     $child_is_transparent, $element_state) = @_;
1752 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1753     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1754 wakaba 1.40 $self->{onerror}->(node => $child_el,
1755     type => 'element not allowed:minus',
1756 wakaba 1.104 level => $self->{level}->{must});
1757 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1758     #
1759     } elsif ($element_state->{phase} eq 'before head') {
1760     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1761     $element_state->{phase} = 'after head';
1762     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1763     $self->{onerror}->(node => $child_el,
1764 wakaba 1.104 type => 'ps element missing',
1765     text => 'head',
1766     level => $self->{level}->{must});
1767 wakaba 1.40 $element_state->{phase} = 'after body';
1768     } else {
1769     $self->{onerror}->(node => $child_el,
1770 wakaba 1.104 type => 'element not allowed',
1771     level => $self->{level}->{must});
1772 wakaba 1.40 }
1773     } elsif ($element_state->{phase} eq 'after head') {
1774     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1775     $element_state->{phase} = 'after body';
1776     } else {
1777     $self->{onerror}->(node => $child_el,
1778 wakaba 1.104 type => 'element not allowed',
1779     level => $self->{level}->{must});
1780 wakaba 1.40 }
1781     } elsif ($element_state->{phase} eq 'after body') {
1782     $self->{onerror}->(node => $child_el,
1783 wakaba 1.104 type => 'element not allowed',
1784     level => $self->{level}->{must});
1785 wakaba 1.40 } else {
1786     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1787     }
1788     },
1789     check_child_text => sub {
1790     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1791     if ($has_significant) {
1792     $self->{onerror}->(node => $child_node,
1793 wakaba 1.104 type => 'character not allowed',
1794     level => $self->{level}->{must});
1795 wakaba 1.40 }
1796     },
1797     check_end => sub {
1798     my ($self, $item, $element_state) = @_;
1799     if ($element_state->{phase} eq 'after body') {
1800     #
1801     } elsif ($element_state->{phase} eq 'before head') {
1802     $self->{onerror}->(node => $item->{node},
1803 wakaba 1.104 type => 'child element missing',
1804     text => 'head',
1805     level => $self->{level}->{must});
1806 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1807 wakaba 1.104 type => 'child element missing',
1808     text => 'body',
1809     level => $self->{level}->{must});
1810 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1811     $self->{onerror}->(node => $item->{node},
1812 wakaba 1.104 type => 'child element missing',
1813     text => 'body',
1814     level => $self->{level}->{must});
1815 wakaba 1.40 } else {
1816     die "check_end: Bad |html| phase: $element_state->{phase}";
1817     }
1818 wakaba 1.1
1819 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1820     },
1821     };
1822 wakaba 1.25
1823 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1824 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1825 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1826     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1827     }, {
1828 wakaba 1.49 %HTMLAttrStatus,
1829 wakaba 1.82 %XHTML2CommonAttrStatus,
1830 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1831     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1832     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1833     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1834 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1835     }),
1836 wakaba 1.40 check_child_element => sub {
1837     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1838     $child_is_transparent, $element_state) = @_;
1839 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1840     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1841 wakaba 1.40 $self->{onerror}->(node => $child_el,
1842     type => 'element not allowed:minus',
1843 wakaba 1.104 level => $self->{level}->{must});
1844 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1845     #
1846     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1847     unless ($element_state->{has_title}) {
1848     $element_state->{has_title} = 1;
1849     } else {
1850     $self->{onerror}->(node => $child_el,
1851     type => 'element not allowed:head title',
1852 wakaba 1.104 level => $self->{level}->{must});
1853 wakaba 1.40 }
1854     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1855     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1856     $self->{onerror}->(node => $child_el,
1857     type => 'element not allowed:head style',
1858 wakaba 1.104 level => $self->{level}->{must});
1859 wakaba 1.1 }
1860 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1861     #
1862    
1863     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1864     ## a |meta| element with none of |charset|, |name|,
1865     ## or |http-equiv| attribute is not allowed. It is non-conforming
1866     ## anyway.
1867 wakaba 1.56
1868     ## TODO: |form| MUST be empty and in XML [WF2].
1869 wakaba 1.40 } else {
1870     $self->{onerror}->(node => $child_el,
1871     type => 'element not allowed:metadata',
1872 wakaba 1.104 level => $self->{level}->{must});
1873 wakaba 1.40 }
1874     $element_state->{in_head_original} = $self->{flag}->{in_head};
1875     $self->{flag}->{in_head} = 1;
1876     },
1877     check_child_text => sub {
1878     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1879     if ($has_significant) {
1880 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1881     level => $self->{level}->{must});
1882 wakaba 1.1 }
1883 wakaba 1.40 },
1884     check_end => sub {
1885     my ($self, $item, $element_state) = @_;
1886     unless ($element_state->{has_title}) {
1887     $self->{onerror}->(node => $item->{node},
1888 wakaba 1.104 type => 'child element missing',
1889     text => 'title',
1890 wakaba 1.105 level => $self->{level}->{must});
1891 wakaba 1.1 }
1892 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1893 wakaba 1.1
1894 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1895 wakaba 1.1 },
1896     };
1897    
1898 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1899     %HTMLTextChecker,
1900 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1901 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1902     %HTMLAttrStatus,
1903 wakaba 1.82 %XHTML2CommonAttrStatus,
1904 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1905     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1906     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1907     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1908 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1909 wakaba 1.49 }),
1910 wakaba 1.40 };
1911 wakaba 1.1
1912 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1913 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1914 wakaba 1.40 %HTMLEmptyChecker,
1915     check_attrs => sub {
1916     my ($self, $item, $element_state) = @_;
1917 wakaba 1.1
1918 wakaba 1.40 if ($self->{has_base}) {
1919     $self->{onerror}->(node => $item->{node},
1920 wakaba 1.104 type => 'element not allowed:base',
1921     level => $self->{level}->{must});
1922 wakaba 1.40 } else {
1923     $self->{has_base} = 1;
1924 wakaba 1.29 }
1925    
1926 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1927     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1928 wakaba 1.14
1929     if ($self->{has_uri_attr} and $has_href) {
1930 wakaba 1.4 ## ISSUE: Are these examples conforming?
1931     ## <head profile="a b c"><base href> (except for |profile|'s
1932     ## non-conformance)
1933     ## <title xml:base="relative"/><base href/> (maybe it should be)
1934     ## <unknown xmlns="relative"/><base href/> (assuming that
1935     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1936     ## <style>@import 'relative';</style><base href>
1937     ## <script>location.href = 'relative';</script><base href>
1938 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1939     ## an exception.
1940 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1941 wakaba 1.104 type => 'basehref after URL attribute',
1942     level => $self->{level}->{must});
1943 wakaba 1.4 }
1944 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1945 wakaba 1.4 ## ISSUE: Are these examples conforming?
1946     ## <head><title xlink:href=""/><base target="name"/></head>
1947     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1948     ## (assuming that |xbl:xbl| is allowed before |base|)
1949     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1950     ## <link href=""/><base target="name"/>
1951     ## <link rel=unknown href=""><base target=name>
1952 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1953 wakaba 1.104 type => 'basetarget after hyperlink',
1954     level => $self->{level}->{must});
1955 wakaba 1.4 }
1956    
1957 wakaba 1.14 if (not $has_href and not $has_target) {
1958 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1959 wakaba 1.104 type => 'attribute missing:href|target',
1960     level => $self->{level}->{must});
1961 wakaba 1.14 }
1962    
1963 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1964    
1965 wakaba 1.4 return $GetHTMLAttrsChecker->({
1966     href => $HTMLURIAttrChecker,
1967     target => $HTMLTargetAttrChecker,
1968 wakaba 1.49 }, {
1969     %HTMLAttrStatus,
1970 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1971     id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1972     target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1973 wakaba 1.40 })->($self, $item, $element_state);
1974 wakaba 1.4 },
1975 wakaba 1.1 };
1976    
1977     $Element->{$HTML_NS}->{link} = {
1978 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1979 wakaba 1.40 %HTMLEmptyChecker,
1980     check_attrs => sub {
1981     my ($self, $item, $element_state) = @_;
1982 wakaba 1.96 my $sizes_attr;
1983 wakaba 1.1 $GetHTMLAttrsChecker->({
1984 wakaba 1.91 charset => sub {
1985     my ($self, $attr) = @_;
1986     $HTMLCharsetChecker->($attr->value, @_);
1987     },
1988 wakaba 1.1 href => $HTMLURIAttrChecker,
1989 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1990 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
1991 wakaba 1.1 media => $HTMLMQAttrChecker,
1992     hreflang => $HTMLLanguageTagAttrChecker,
1993 wakaba 1.96 sizes => sub {
1994     my ($self, $attr) = @_;
1995     $sizes_attr = $attr;
1996     my %word;
1997     for my $word (grep {length $_}
1998 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1999 wakaba 1.96 unless ($word{$word}) {
2000     $word{$word} = 1;
2001     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2002     #
2003     } else {
2004     $self->{onerror}->(node => $attr,
2005 wakaba 1.104 type => 'sizes:syntax error',
2006 wakaba 1.96 value => $word,
2007 wakaba 1.104 level => $self->{level}->{must});
2008 wakaba 1.96 }
2009     } else {
2010     $self->{onerror}->(node => $attr, type => 'duplicate token',
2011     value => $word,
2012 wakaba 1.104 level => $self->{level}->{must});
2013 wakaba 1.96 }
2014     }
2015     },
2016 wakaba 1.70 target => $HTMLTargetAttrChecker,
2017 wakaba 1.1 type => $HTMLIMTAttrChecker,
2018     ## NOTE: Though |title| has special semantics,
2019     ## syntactically same as the |title| as global attribute.
2020 wakaba 1.49 }, {
2021     %HTMLAttrStatus,
2022 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2023 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2024     ## NOTE: |charset| attribute had been part of HTML5 spec though
2025     ## it had been commented out.
2026 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2027 wakaba 1.82 FEATURE_M12N10_REC,
2028 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2029     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2030     media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2031 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2032 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2033 wakaba 1.153 FEATURE_M12N10_REC,
2034 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2035 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2036 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2037 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2038 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2039     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2040 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2041 wakaba 1.40 })->($self, $item, $element_state);
2042 wakaba 1.96
2043 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2044     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2045 wakaba 1.4 } else {
2046 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2047 wakaba 1.104 type => 'attribute missing',
2048     text => 'href',
2049     level => $self->{level}->{must});
2050 wakaba 1.1 }
2051 wakaba 1.96
2052 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2053     $self->{onerror}->(node => $item->{node},
2054 wakaba 1.104 type => 'attribute missing',
2055     text => 'rel',
2056     level => $self->{level}->{must});
2057 wakaba 1.96 }
2058    
2059     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2060     $self->{onerror}->(node => $sizes_attr,
2061     type => 'attribute not allowed',
2062 wakaba 1.104 level => $self->{level}->{must});
2063 wakaba 1.1 }
2064 wakaba 1.116
2065     if ($element_state->{link_rel}->{alternate} and
2066     $element_state->{link_rel}->{stylesheet}) {
2067     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2068     unless ($title_attr) {
2069     $self->{onerror}->(node => $item->{node},
2070     type => 'attribute missing',
2071     text => 'title',
2072     level => $self->{level}->{must});
2073     } elsif ($title_attr->value eq '') {
2074     $self->{onerror}->(node => $title_attr,
2075     type => 'empty style sheet title',
2076     level => $self->{level}->{must});
2077     }
2078     }
2079 wakaba 1.1 },
2080     };
2081    
2082     $Element->{$HTML_NS}->{meta} = {
2083 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2084 wakaba 1.40 %HTMLEmptyChecker,
2085     check_attrs => sub {
2086     my ($self, $item, $element_state) = @_;
2087 wakaba 1.1 my $name_attr;
2088     my $http_equiv_attr;
2089     my $charset_attr;
2090     my $content_attr;
2091 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2092 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2093     $attr_ns = '' unless defined $attr_ns;
2094     my $attr_ln = $attr->manakai_local_name;
2095     my $checker;
2096 wakaba 1.73 my $status;
2097 wakaba 1.1 if ($attr_ns eq '') {
2098 wakaba 1.73 $status = {
2099     %HTMLAttrStatus,
2100 wakaba 1.82 %XHTML2CommonAttrStatus,
2101 wakaba 1.153 charset => FEATURE_HTML5_WD,
2102     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2103     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2104     'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2105     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2106     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2107     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2108 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2109     }->{$attr_ln};
2110    
2111 wakaba 1.1 if ($attr_ln eq 'content') {
2112     $content_attr = $attr;
2113     $checker = 1;
2114     } elsif ($attr_ln eq 'name') {
2115     $name_attr = $attr;
2116     $checker = 1;
2117     } elsif ($attr_ln eq 'http-equiv') {
2118     $http_equiv_attr = $attr;
2119     $checker = 1;
2120     } elsif ($attr_ln eq 'charset') {
2121     $charset_attr = $attr;
2122     $checker = 1;
2123 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2124 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2125 wakaba 1.67 $checker = sub {};
2126 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2127     $attr_ln !~ /[A-Z]/) {
2128 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2129     $status = $HTMLDatasetAttrStatus;
2130 wakaba 1.1 } else {
2131     $checker = $HTMLAttrChecker->{$attr_ln}
2132 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2133 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2134     }
2135     } else {
2136     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2137 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2138     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2139     || $AttrStatus->{$attr_ns}->{''};
2140     $status = FEATURE_ALLOWED if not defined $status;
2141 wakaba 1.1 }
2142 wakaba 1.62
2143 wakaba 1.1 if ($checker) {
2144 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2145 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2146 wakaba 1.54 #
2147 wakaba 1.1 } else {
2148 wakaba 1.104 $self->{onerror}->(node => $attr,
2149     type => 'unknown attribute',
2150     level => $self->{level}->{uncertain});
2151 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2152     }
2153    
2154 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2155 wakaba 1.1 }
2156    
2157     if (defined $name_attr) {
2158     if (defined $http_equiv_attr) {
2159     $self->{onerror}->(node => $http_equiv_attr,
2160 wakaba 1.104 type => 'attribute not allowed',
2161     level => $self->{level}->{must});
2162 wakaba 1.1 } elsif (defined $charset_attr) {
2163     $self->{onerror}->(node => $charset_attr,
2164 wakaba 1.104 type => 'attribute not allowed',
2165     level => $self->{level}->{must});
2166 wakaba 1.1 }
2167     my $metadata_name = $name_attr->value;
2168     my $metadata_value;
2169     if (defined $content_attr) {
2170     $metadata_value = $content_attr->value;
2171     } else {
2172 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2173 wakaba 1.104 type => 'attribute missing',
2174     text => 'content',
2175     level => $self->{level}->{must});
2176 wakaba 1.1 $metadata_value = '';
2177     }
2178     } elsif (defined $http_equiv_attr) {
2179     if (defined $charset_attr) {
2180     $self->{onerror}->(node => $charset_attr,
2181 wakaba 1.104 type => 'attribute not allowed',
2182     level => $self->{level}->{must});
2183 wakaba 1.1 }
2184     unless (defined $content_attr) {
2185 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2186 wakaba 1.104 type => 'attribute missing',
2187     text => 'content',
2188     level => $self->{level}->{must});
2189 wakaba 1.1 }
2190     } elsif (defined $charset_attr) {
2191     if (defined $content_attr) {
2192     $self->{onerror}->(node => $content_attr,
2193 wakaba 1.104 type => 'attribute not allowed',
2194     level => $self->{level}->{must});
2195 wakaba 1.1 }
2196     } else {
2197     if (defined $content_attr) {
2198     $self->{onerror}->(node => $content_attr,
2199 wakaba 1.104 type => 'attribute not allowed',
2200     level => $self->{level}->{must});
2201 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2202 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2203     level => $self->{level}->{must});
2204 wakaba 1.1 } else {
2205 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2206 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2207     level => $self->{level}->{must});
2208 wakaba 1.1 }
2209     }
2210    
2211 wakaba 1.32 my $check_charset_decl = sub () {
2212 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2213 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2214     for my $el (@{$parent->child_nodes}) {
2215     next unless $el->node_type == 1; # ELEMENT_NODE
2216 wakaba 1.40 unless ($el eq $item->{node}) {
2217 wakaba 1.29 ## NOTE: Not the first child element.
2218 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2219 wakaba 1.32 type => 'element not allowed:meta charset',
2220 wakaba 1.104 level => $self->{level}->{must});
2221 wakaba 1.29 }
2222     last;
2223     ## NOTE: Entity references are not supported.
2224     }
2225     } else {
2226 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2227 wakaba 1.32 type => 'element not allowed:meta charset',
2228 wakaba 1.104 level => $self->{level}->{must});
2229 wakaba 1.29 }
2230    
2231 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2232     $self->{onerror}->(node => $item->{node},
2233 wakaba 1.32 type => 'in XML:charset',
2234 wakaba 1.104 level => $self->{level}->{must});
2235 wakaba 1.1 }
2236 wakaba 1.32 }; # $check_charset_decl
2237 wakaba 1.21
2238 wakaba 1.32 my $check_charset = sub ($$) {
2239     my ($attr, $charset_value) = @_;
2240 wakaba 1.21
2241 wakaba 1.91 my $charset;
2242     ($charset, $charset_value)
2243     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2244    
2245 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2246 wakaba 1.21 if (defined $ic) {
2247     ## TODO: Test for this case
2248     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2249     if ($charset ne $ic_charset) {
2250 wakaba 1.32 $self->{onerror}->(node => $attr,
2251 wakaba 1.104 type => 'mismatched charset name',
2252 wakaba 1.106 text => $ic,
2253 wakaba 1.104 value => $charset_value,
2254     level => $self->{level}->{must});
2255 wakaba 1.21 }
2256     } else {
2257     ## NOTE: MUST, but not checkable, since the document is not originally
2258     ## in serialized form (or the parser does not preserve the input
2259     ## encoding information).
2260 wakaba 1.32 $self->{onerror}->(node => $attr,
2261 wakaba 1.104 type => 'mismatched charset name not checked',
2262     value => $charset_value,
2263     level => $self->{level}->{uncertain});
2264 wakaba 1.21 }
2265    
2266 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2267     $self->{onerror}->(node => $attr,
2268 wakaba 1.104 type => 'charref in charset',
2269     level => $self->{level}->{must},
2270     layer => 'syntax');
2271 wakaba 1.22 }
2272 wakaba 1.32 }; # $check_charset
2273    
2274     ## TODO: metadata conformance
2275    
2276     ## TODO: pragma conformance
2277     if (defined $http_equiv_attr) { ## An enumerated attribute
2278     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2279 wakaba 1.33
2280 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2281     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2282     node => $http_equiv_attr,
2283 wakaba 1.104 level => $self->{level}->{must});
2284 wakaba 1.85 } else {
2285     $self->{has_http_equiv}->{$keyword} = 1;
2286     }
2287    
2288     if ($keyword eq 'content-type') {
2289 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2290 wakaba 1.33
2291 wakaba 1.32 $check_charset_decl->();
2292     if ($content_attr) {
2293     my $content = $content_attr->value;
2294 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2295 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2296 wakaba 1.58 =(.+)\z!sx) {
2297 wakaba 1.32 $check_charset->($content_attr, $1);
2298     } else {
2299     $self->{onerror}->(node => $content_attr,
2300     type => 'meta content-type syntax error',
2301 wakaba 1.104 level => $self->{level}->{must});
2302 wakaba 1.85 }
2303     }
2304     } elsif ($keyword eq 'default-style') {
2305     ## ISSUE: Not defined yet in the spec.
2306     } elsif ($keyword eq 'refresh') {
2307     if ($content_attr) {
2308     my $content = $content_attr->value;
2309     if ($content =~ /\A[0-9]+\z/) {
2310     ## NOTE: Valid non-negative integer.
2311     #
2312 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2313 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2314     Whatpm::URIChecker->check_iri_reference ($content, sub {
2315 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2316 wakaba 1.106 }, $self->{level});
2317 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2318    
2319     $element_state->{uri_info}->{content}->{node} = $content_attr;
2320     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2321     ## TODO: absolute
2322     push @{$self->{return}->{uri}->{$content} ||= []},
2323     $element_state->{uri_info}->{content};
2324     } else {
2325     $self->{onerror}->(node => $content_attr,
2326     type => 'refresh:syntax error',
2327 wakaba 1.104 level => $self->{level}->{must});
2328 wakaba 1.32 }
2329     }
2330     } else {
2331     $self->{onerror}->(node => $http_equiv_attr,
2332 wakaba 1.104 type => 'enumerated:invalid',
2333     level => $self->{level}->{must});
2334 wakaba 1.32 }
2335     }
2336    
2337     if (defined $charset_attr) {
2338     $check_charset_decl->();
2339     $check_charset->($charset_attr, $charset_attr->value);
2340 wakaba 1.1 }
2341     },
2342     };
2343    
2344     $Element->{$HTML_NS}->{style} = {
2345 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2346 wakaba 1.40 %HTMLChecker,
2347     check_attrs => $GetHTMLAttrsChecker->({
2348 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2349     media => $HTMLMQAttrChecker,
2350     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2351     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2352     ## not different
2353 wakaba 1.49 }, {
2354     %HTMLAttrStatus,
2355 wakaba 1.82 %XHTML2CommonAttrStatus,
2356 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2357 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2358 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2359 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2360     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2361     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2362     scoped => FEATURE_HTML5_FD,
2363     title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2364     type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2365 wakaba 1.1 }),
2366 wakaba 1.40 check_start => sub {
2367     my ($self, $item, $element_state) = @_;
2368    
2369 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2370 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2371 wakaba 1.93 $type = 'text/css' unless defined $type;
2372     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2373     $type = "$1/$2";
2374     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2375     } else {
2376     ## NOTE: We don't know how parameters are handled by UAs. According to
2377     ## HTML5 specification, <style> with unknown parameters in |type=""|
2378     ## must be ignored.
2379     undef $type;
2380     }
2381     if (not defined $type) {
2382     $element_state->{allow_element} = 1; # invalid type=""
2383     } elsif ($type eq 'text/css') {
2384 wakaba 1.40 $element_state->{allow_element} = 0;
2385 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2386     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2387     # $element_state->{allow_element} = 1;
2388 wakaba 1.40 } else {
2389     $element_state->{allow_element} = 1; # unknown
2390     }
2391 wakaba 1.93 $element_state->{style_type} = $type;
2392 wakaba 1.79
2393     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2394     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2395 wakaba 1.107
2396     $element_state->{text} = '';
2397 wakaba 1.40 },
2398     check_child_element => sub {
2399     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2400     $child_is_transparent, $element_state) = @_;
2401 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2402     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2403 wakaba 1.40 $self->{onerror}->(node => $child_el,
2404     type => 'element not allowed:minus',
2405 wakaba 1.104 level => $self->{level}->{must});
2406 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2407     #
2408     } elsif ($element_state->{allow_element}) {
2409     #
2410     } else {
2411 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2412     level => $self->{level}->{must});
2413 wakaba 1.40 }
2414     },
2415     check_child_text => sub {
2416     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2417 wakaba 1.115 $element_state->{text} .= $child_node->data;
2418 wakaba 1.40 },
2419     check_end => sub {
2420     my ($self, $item, $element_state) = @_;
2421 wakaba 1.93 if (not defined $element_state->{style_type}) {
2422     ## NOTE: Invalid type=""
2423     #
2424     } elsif ($element_state->{style_type} eq 'text/css') {
2425 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2426     container_node => $item->{node},
2427 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2428 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2429     ## NOTE: XML content should be checked by THIS instance of checker
2430     ## as part of normal tree validation. However, we don't know of any
2431     ## XML-based styling language that can be used in HTML <style> element,
2432     ## such that we throw a "style language not supported" error.
2433 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2434     type => 'XML style lang',
2435     text => $element_state->{style_type},
2436     level => $self->{level}->{uncertain});
2437 wakaba 1.93 } else {
2438     ## NOTE: Should we raise some kind of error for,
2439     ## say, <style type="text/plaion">?
2440     $self->{onsubdoc}->({s => $element_state->{text},
2441     container_node => $item->{node},
2442     media_type => $element_state->{style_type},
2443     is_char_string => 1});
2444 wakaba 1.27 }
2445 wakaba 1.40
2446     $HTMLChecker{check_end}->(@_);
2447 wakaba 1.1 },
2448     };
2449 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2450 wakaba 1.1
2451     $Element->{$HTML_NS}->{body} = {
2452 wakaba 1.72 %HTMLFlowContentChecker,
2453 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2454 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2455     alink => $HTMLColorAttrChecker,
2456     background => $HTMLURIAttrChecker,
2457     bgcolor => $HTMLColorAttrChecker,
2458     link => $HTMLColorAttrChecker,
2459     text => $HTMLColorAttrChecker,
2460     vlink => $HTMLColorAttrChecker,
2461     }, {
2462 wakaba 1.49 %HTMLAttrStatus,
2463 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2464 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2465     background => FEATURE_M12N10_REC_DEPRECATED,
2466     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2467 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2468 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2469 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2470     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2471 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2472     vlink => FEATURE_M12N10_REC_DEPRECATED,
2473     }),
2474 wakaba 1.68 check_start => sub {
2475     my ($self, $item, $element_state) = @_;
2476    
2477     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2478 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2479     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2480 wakaba 1.68 },
2481 wakaba 1.1 };
2482    
2483     $Element->{$HTML_NS}->{section} = {
2484 wakaba 1.72 %HTMLFlowContentChecker,
2485 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2486 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2487     }, {
2488     %HTMLAttrStatus,
2489     %XHTML2CommonAttrStatus,
2490     }),
2491 wakaba 1.1 };
2492    
2493     $Element->{$HTML_NS}->{nav} = {
2494 wakaba 1.153 status => FEATURE_HTML5_LC,
2495 wakaba 1.72 %HTMLFlowContentChecker,
2496 wakaba 1.1 };
2497    
2498     $Element->{$HTML_NS}->{article} = {
2499 wakaba 1.153 status => FEATURE_HTML5_LC,
2500 wakaba 1.72 %HTMLFlowContentChecker,
2501 wakaba 1.1 };
2502    
2503     $Element->{$HTML_NS}->{blockquote} = {
2504 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2505 wakaba 1.72 %HTMLFlowContentChecker,
2506 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2507 wakaba 1.1 cite => $HTMLURIAttrChecker,
2508 wakaba 1.49 }, {
2509     %HTMLAttrStatus,
2510 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2511 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2512 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2513 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2514 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2515 wakaba 1.1 }),
2516 wakaba 1.66 check_start => sub {
2517     my ($self, $item, $element_state) = @_;
2518    
2519     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2520 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2521     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2522 wakaba 1.66 },
2523 wakaba 1.1 };
2524    
2525     $Element->{$HTML_NS}->{aside} = {
2526 wakaba 1.153 status => FEATURE_HTML5_LC,
2527 wakaba 1.72 %HTMLFlowContentChecker,
2528 wakaba 1.1 };
2529    
2530     $Element->{$HTML_NS}->{h1} = {
2531 wakaba 1.40 %HTMLPhrasingContentChecker,
2532 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2533 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2534     align => $GetHTMLEnumeratedAttrChecker->({
2535     left => 1, center => 1, right => 1, justify => 1,
2536     }),
2537     }, {
2538 wakaba 1.49 %HTMLAttrStatus,
2539 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2540 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2541 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2542 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2543 wakaba 1.49 }),
2544 wakaba 1.40 check_start => sub {
2545     my ($self, $item, $element_state) = @_;
2546     $self->{flag}->{has_hn} = 1;
2547 wakaba 1.79
2548     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2549     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2550 wakaba 1.1 },
2551     };
2552    
2553 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2554 wakaba 1.1
2555 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2556 wakaba 1.1
2557 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2558 wakaba 1.1
2559 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2560 wakaba 1.1
2561 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2562 wakaba 1.1
2563 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2564    
2565 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2566 wakaba 1.153 status => FEATURE_HTML5_LC,
2567 wakaba 1.72 %HTMLFlowContentChecker,
2568 wakaba 1.40 check_start => sub {
2569     my ($self, $item, $element_state) = @_;
2570     $self->_add_minus_elements ($element_state,
2571     {$HTML_NS => {qw/header 1 footer 1/}},
2572 wakaba 1.58 $HTMLSectioningContent);
2573 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2574     $self->{flag}->{has_hn} = 0;
2575 wakaba 1.79
2576     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2577     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2578 wakaba 1.40 },
2579     check_end => sub {
2580     my ($self, $item, $element_state) = @_;
2581     $self->_remove_minus_elements ($element_state);
2582     unless ($self->{flag}->{has_hn}) {
2583     $self->{onerror}->(node => $item->{node},
2584 wakaba 1.104 type => 'element missing:hn',
2585     level => $self->{level}->{must});
2586 wakaba 1.40 }
2587     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2588 wakaba 1.1
2589 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2590 wakaba 1.1 },
2591 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2592 wakaba 1.1 };
2593    
2594     $Element->{$HTML_NS}->{footer} = {
2595 wakaba 1.153 status => FEATURE_HTML5_LC,
2596 wakaba 1.72 %HTMLFlowContentChecker,
2597 wakaba 1.40 check_start => sub {
2598     my ($self, $item, $element_state) = @_;
2599     $self->_add_minus_elements ($element_state,
2600     {$HTML_NS => {footer => 1}},
2601 wakaba 1.58 $HTMLSectioningContent,
2602 wakaba 1.57 $HTMLHeadingContent);
2603 wakaba 1.79
2604     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2605     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2606 wakaba 1.40 },
2607     check_end => sub {
2608     my ($self, $item, $element_state) = @_;
2609     $self->_remove_minus_elements ($element_state);
2610 wakaba 1.1
2611 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2612 wakaba 1.1 },
2613     };
2614    
2615     $Element->{$HTML_NS}->{address} = {
2616 wakaba 1.72 %HTMLFlowContentChecker,
2617 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2618 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2619     ## TODO: add test
2620     #align => $GetHTMLEnumeratedAttrChecker->({
2621     # left => 1, center => 1, right => 1, justify => 1,
2622     #}),
2623     }, {
2624 wakaba 1.49 %HTMLAttrStatus,
2625 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2626 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2627 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2628 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2629     sdapref => FEATURE_HTML20_RFC,
2630 wakaba 1.49 }),
2631 wakaba 1.40 check_start => sub {
2632     my ($self, $item, $element_state) = @_;
2633     $self->_add_minus_elements ($element_state,
2634     {$HTML_NS => {footer => 1, address => 1}},
2635     $HTMLSectioningContent, $HTMLHeadingContent);
2636 wakaba 1.79
2637     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2638     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2639 wakaba 1.40 },
2640     check_end => sub {
2641     my ($self, $item, $element_state) = @_;
2642     $self->_remove_minus_elements ($element_state);
2643 wakaba 1.29
2644 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2645 wakaba 1.29 },
2646 wakaba 1.1 };
2647    
2648     $Element->{$HTML_NS}->{p} = {
2649 wakaba 1.40 %HTMLPhrasingContentChecker,
2650 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2651 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2652     align => $GetHTMLEnumeratedAttrChecker->({
2653     left => 1, center => 1, right => 1, justify => 1,
2654     }),
2655     }, {
2656 wakaba 1.49 %HTMLAttrStatus,
2657 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2658 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2659 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2660 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2661 wakaba 1.49 }),
2662 wakaba 1.1 };
2663    
2664     $Element->{$HTML_NS}->{hr} = {
2665 wakaba 1.40 %HTMLEmptyChecker,
2666 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2667 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2668     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2669     }, {
2670 wakaba 1.49 %HTMLAttrStatus,
2671     %HTMLM12NCommonAttrStatus,
2672     align => FEATURE_M12N10_REC_DEPRECATED,
2673 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2674 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2675 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2676 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2677     width => FEATURE_M12N10_REC_DEPRECATED,
2678     }),
2679 wakaba 1.1 };
2680    
2681     $Element->{$HTML_NS}->{br} = {
2682 wakaba 1.40 %HTMLEmptyChecker,
2683 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2684 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2685     clear => $GetHTMLEnumeratedAttrChecker->({
2686     left => 1, all => 1, right => 1, none => 1,
2687     }),
2688     }, {
2689 wakaba 1.49 %HTMLAttrStatus,
2690 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2691 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2692 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2693 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2694 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2695     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2696 wakaba 1.49 }),
2697 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2698     ## (This requirement is semantic so that we cannot check.)
2699 wakaba 1.1 };
2700    
2701     $Element->{$HTML_NS}->{dialog} = {
2702 wakaba 1.153 status => FEATURE_HTML5_WD,
2703 wakaba 1.40 %HTMLChecker,
2704     check_start => sub {
2705     my ($self, $item, $element_state) = @_;
2706     $element_state->{phase} = 'before dt';
2707 wakaba 1.79
2708     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2709     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2710 wakaba 1.40 },
2711     check_child_element => sub {
2712     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2713     $child_is_transparent, $element_state) = @_;
2714 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2715     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2716 wakaba 1.40 $self->{onerror}->(node => $child_el,
2717     type => 'element not allowed:minus',
2718 wakaba 1.104 level => $self->{level}->{must});
2719 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2720     #
2721     } elsif ($element_state->{phase} eq 'before dt') {
2722     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2723     $element_state->{phase} = 'before dd';
2724     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2725     $self->{onerror}
2726 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2727     text => 'dt',
2728     level => $self->{level}->{must});
2729 wakaba 1.40 $element_state->{phase} = 'before dt';
2730     } else {
2731 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2732     level => $self->{level}->{must});
2733 wakaba 1.40 }
2734     } elsif ($element_state->{phase} eq 'before dd') {
2735     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2736     $element_state->{phase} = 'before dt';
2737     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2738     $self->{onerror}
2739 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2740     text => 'dd',
2741     level => $self->{level}->{must});
2742 wakaba 1.40 $element_state->{phase} = 'before dd';
2743     } else {
2744 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2745     level => $self->{level}->{must});
2746 wakaba 1.1 }
2747 wakaba 1.40 } else {
2748     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2749     }
2750     },
2751     check_child_text => sub {
2752     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2753     if ($has_significant) {
2754 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2755     level => $self->{level}->{must});
2756 wakaba 1.1 }
2757 wakaba 1.40 },
2758     check_end => sub {
2759     my ($self, $item, $element_state) = @_;
2760     if ($element_state->{phase} eq 'before dd') {
2761     $self->{onerror}->(node => $item->{node},
2762 wakaba 1.104 type => 'child element missing',
2763     text => 'dd',
2764     level => $self->{level}->{must});
2765 wakaba 1.1 }
2766 wakaba 1.40
2767     $HTMLChecker{check_end}->(@_);
2768 wakaba 1.1 },
2769     };
2770    
2771     $Element->{$HTML_NS}->{pre} = {
2772 wakaba 1.40 %HTMLPhrasingContentChecker,
2773 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2774 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2775     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2776     }, {
2777 wakaba 1.49 %HTMLAttrStatus,
2778 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2779 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2780 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2781 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2782     }),
2783 wakaba 1.101 check_end => sub {
2784     my ($self, $item, $element_state) = @_;
2785    
2786     ## TODO: Flag to enable/disable IDL checking?
2787 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2788 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2789     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2790     ## NOTE: pre.code > code.idl-code: WebIDL spec
2791     ## NOTE: pre.idl-code: DOM1 spec
2792     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2793     ## NOTE: pre.schema: ReSpec-generated specs
2794 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2795     container_node => $item->{node},
2796     media_type => 'text/x-webidl',
2797     is_char_string => 1});
2798     }
2799    
2800 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2801 wakaba 1.101 },
2802 wakaba 1.1 };
2803    
2804     $Element->{$HTML_NS}->{ol} = {
2805 wakaba 1.40 %HTMLChecker,
2806 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2807 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2808 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2809 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2810 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2811 wakaba 1.69 ## TODO: HTML4 |type|
2812 wakaba 1.49 }, {
2813     %HTMLAttrStatus,
2814 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2815 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2816 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2817 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2818     reversed => FEATURE_HTML5_WD,
2819 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2820 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2821     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2822 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2823 wakaba 1.1 }),
2824 wakaba 1.40 check_child_element => sub {
2825     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2826     $child_is_transparent, $element_state) = @_;
2827 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2828     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2829 wakaba 1.40 $self->{onerror}->(node => $child_el,
2830     type => 'element not allowed:minus',
2831 wakaba 1.104 level => $self->{level}->{must});
2832 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2833     #
2834     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2835     #
2836     } else {
2837 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2838     level => $self->{level}->{must});
2839 wakaba 1.1 }
2840 wakaba 1.40 },
2841     check_child_text => sub {
2842     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2843     if ($has_significant) {
2844 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2845     level => $self->{level}->{must});
2846 wakaba 1.1 }
2847     },
2848     };
2849    
2850     $Element->{$HTML_NS}->{ul} = {
2851 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2852 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2853 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2854     compact => $GetHTMLBooleanAttrChecker->('compact'),
2855 wakaba 1.69 ## TODO: HTML4 |type|
2856     ## TODO: sdaform, align
2857 wakaba 1.68 }, {
2858 wakaba 1.49 %HTMLAttrStatus,
2859 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2860 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2861 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2862 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2863 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2864 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2865     }),
2866 wakaba 1.1 };
2867    
2868 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2869     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2870     %{$Element->{$HTML_NS}->{ul}},
2871     status => FEATURE_M12N10_REC_DEPRECATED,
2872 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2873     compact => $GetHTMLBooleanAttrChecker->('compact'),
2874     }, {
2875 wakaba 1.64 %HTMLAttrStatus,
2876     %HTMLM12NCommonAttrStatus,
2877     align => FEATURE_HTML2X_RFC,
2878     compact => FEATURE_M12N10_REC_DEPRECATED,
2879 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2880 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2881     sdapref => FEATURE_HTML20_RFC,
2882     }),
2883     };
2884    
2885 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2886 wakaba 1.72 %HTMLFlowContentChecker,
2887 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2888 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2889 wakaba 1.69 ## TODO: HTML4 |type|
2890 wakaba 1.49 value => sub {
2891 wakaba 1.1 my ($self, $attr) = @_;
2892 wakaba 1.152
2893     my $parent_is_ol;
2894 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2895     if (defined $parent) {
2896     my $parent_ns = $parent->namespace_uri;
2897     $parent_ns = '' unless defined $parent_ns;
2898     my $parent_ln = $parent->manakai_local_name;
2899 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2900     }
2901    
2902     unless ($parent_is_ol) {
2903     ## ISSUE: No "MUST" in the spec.
2904     $self->{onerror}->(node => $attr,
2905     type => 'non-ol li value',
2906     level => $self->{level}->{html5_fact});
2907 wakaba 1.1 }
2908 wakaba 1.152
2909 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2910 wakaba 1.131 },
2911 wakaba 1.49 }, {
2912     %HTMLAttrStatus,
2913 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2914 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2915 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2916 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2917 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2918 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2919 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2920 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2921 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2922 wakaba 1.1 }),
2923 wakaba 1.40 check_child_element => sub {
2924     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2925     $child_is_transparent, $element_state) = @_;
2926     if ($self->{flag}->{in_menu}) {
2927 wakaba 1.152 ## TODO: In <dir> element, then ...
2928 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2929     } else {
2930 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2931 wakaba 1.40 }
2932     },
2933     check_child_text => sub {
2934     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2935     if ($self->{flag}->{in_menu}) {
2936 wakaba 1.152 ## TODO: In <dir> element, then ...
2937 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2938 wakaba 1.1 } else {
2939 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
2940 wakaba 1.1 }
2941     },
2942     };
2943    
2944     $Element->{$HTML_NS}->{dl} = {
2945 wakaba 1.40 %HTMLChecker,
2946 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2947 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2948     compact => $GetHTMLBooleanAttrChecker->('compact'),
2949     }, {
2950 wakaba 1.49 %HTMLAttrStatus,
2951 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2952 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2953 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2954 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2955     sdapref => FEATURE_HTML20_RFC,
2956 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2957     }),
2958 wakaba 1.40 check_start => sub {
2959     my ($self, $item, $element_state) = @_;
2960     $element_state->{phase} = 'before dt';
2961 wakaba 1.79
2962     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2963     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2964 wakaba 1.40 },
2965     check_child_element => sub {
2966     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2967     $child_is_transparent, $element_state) = @_;
2968 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2969     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2970 wakaba 1.40 $self->{onerror}->(node => $child_el,
2971     type => 'element not allowed:minus',
2972 wakaba 1.104 level => $self->{level}->{must});
2973 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2974     #
2975     } elsif ($element_state->{phase} eq 'in dds') {
2976     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2977     #$element_state->{phase} = 'in dds';
2978     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2979     $element_state->{phase} = 'in dts';
2980     } else {
2981 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2982     level => $self->{level}->{must});
2983 wakaba 1.40 }
2984     } elsif ($element_state->{phase} eq 'in dts') {
2985     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2986     #$element_state->{phase} = 'in dts';
2987     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2988     $element_state->{phase} = 'in dds';
2989     } else {
2990 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2991     level => $self->{level}->{must});
2992 wakaba 1.40 }
2993     } elsif ($element_state->{phase} eq 'before dt') {
2994     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2995     $element_state->{phase} = 'in dts';
2996     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2997     $self->{onerror}
2998 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2999     text => 'dt',
3000     level => $self->{level}->{must});
3001 wakaba 1.40 $element_state->{phase} = 'in dds';
3002     } else {
3003 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3004     level => $self->{level}->{must});
3005 wakaba 1.1 }
3006 wakaba 1.40 } else {
3007     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3008 wakaba 1.1 }
3009 wakaba 1.40 },
3010     check_child_text => sub {
3011     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3012     if ($has_significant) {
3013 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3014     level => $self->{level}->{must});
3015 wakaba 1.40 }
3016     },
3017     check_end => sub {
3018     my ($self, $item, $element_state) = @_;
3019     if ($element_state->{phase} eq 'in dts') {
3020     $self->{onerror}->(node => $item->{node},
3021 wakaba 1.104 type => 'child element missing',
3022     text => 'dd',
3023     level => $self->{level}->{must});
3024 wakaba 1.1 }
3025    
3026 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3027 wakaba 1.1 },
3028     };
3029    
3030     $Element->{$HTML_NS}->{dt} = {
3031 wakaba 1.40 %HTMLPhrasingContentChecker,
3032 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3033 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3034     %HTMLAttrStatus,
3035 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3036 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3037 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3038 wakaba 1.49 }),
3039 wakaba 1.1 };
3040    
3041     $Element->{$HTML_NS}->{dd} = {
3042 wakaba 1.72 %HTMLFlowContentChecker,
3043 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3044 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3045     %HTMLAttrStatus,
3046 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3047 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3048 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3049 wakaba 1.49 }),
3050 wakaba 1.1 };
3051    
3052     $Element->{$HTML_NS}->{a} = {
3053 wakaba 1.123 %HTMLTransparentChecker,
3054 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3055 wakaba 1.40 check_attrs => sub {
3056     my ($self, $item, $element_state) = @_;
3057 wakaba 1.1 my %attr;
3058 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3059 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3060     $attr_ns = '' unless defined $attr_ns;
3061     my $attr_ln = $attr->manakai_local_name;
3062     my $checker;
3063 wakaba 1.73 my $status;
3064 wakaba 1.1 if ($attr_ns eq '') {
3065 wakaba 1.73 $status = {
3066     %HTMLAttrStatus,
3067 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3068 wakaba 1.73 accesskey => FEATURE_M12N10_REC,
3069     charset => FEATURE_M12N10_REC,
3070 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3071 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3072     dn => FEATURE_RFC2659,
3073 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3074 wakaba 1.153 FEATURE_M12N10_REC,
3075     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3076     FEATURE_M12N10_REC,
3077     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3078     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3079 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3080     name => FEATURE_M12N10_REC_DEPRECATED,
3081     nonce => FEATURE_RFC2659,
3082     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3083     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3084 wakaba 1.153 ping => FEATURE_HTML5_WD,
3085 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3086     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3087 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3088 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3089 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3090 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3091     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3092 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3093     }->{$attr_ln};
3094    
3095 wakaba 1.1 $checker = {
3096 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3097 wakaba 1.91 charset => sub {
3098     my ($self, $attr) = @_;
3099     $HTMLCharsetChecker->($attr->value, @_);
3100     },
3101 wakaba 1.70 ## TODO: HTML4 |coords|
3102 wakaba 1.1 target => $HTMLTargetAttrChecker,
3103     href => $HTMLURIAttrChecker,
3104     ping => $HTMLSpaceURIsAttrChecker,
3105 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3106 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3107 wakaba 1.70 ## TODO: HTML4 |shape|
3108 wakaba 1.1 media => $HTMLMQAttrChecker,
3109 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3110 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3111     type => $HTMLIMTAttrChecker,
3112     }->{$attr_ln};
3113     if ($checker) {
3114     $attr{$attr_ln} = $attr;
3115 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3116     $attr_ln !~ /[A-Z]/) {
3117 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3118     $status = $HTMLDatasetAttrStatus;
3119 wakaba 1.1 } else {
3120     $checker = $HTMLAttrChecker->{$attr_ln};
3121     }
3122     }
3123     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3124     || $AttrChecker->{$attr_ns}->{''};
3125 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3126     || $AttrStatus->{$attr_ns}->{''};
3127     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3128 wakaba 1.62
3129 wakaba 1.1 if ($checker) {
3130 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3131 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3132 wakaba 1.54 #
3133 wakaba 1.1 } else {
3134 wakaba 1.104 $self->{onerror}->(node => $attr,
3135     type => 'unknown attribute',
3136     level => $self->{level}->{uncertain});
3137 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3138 wakaba 1.1 }
3139 wakaba 1.49
3140 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3141 wakaba 1.1 }
3142    
3143 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3144 wakaba 1.4 if (defined $attr{href}) {
3145     $self->{has_hyperlink_element} = 1;
3146 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3147 wakaba 1.4 } else {
3148 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3149     if (defined $attr{$_}) {
3150     $self->{onerror}->(node => $attr{$_},
3151 wakaba 1.104 type => 'attribute not allowed',
3152     level => $self->{level}->{must});
3153 wakaba 1.1 }
3154     }
3155     }
3156 wakaba 1.66
3157     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3158 wakaba 1.1 },
3159 wakaba 1.40 check_start => sub {
3160     my ($self, $item, $element_state) = @_;
3161     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3162 wakaba 1.79
3163     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3164     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3165 wakaba 1.40 },
3166     check_end => sub {
3167     my ($self, $item, $element_state) = @_;
3168     $self->_remove_minus_elements ($element_state);
3169 wakaba 1.59 delete $self->{flag}->{in_a_href}
3170     unless $element_state->{in_a_href_original};
3171 wakaba 1.1
3172 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3173 wakaba 1.1 },
3174     };
3175    
3176     $Element->{$HTML_NS}->{q} = {
3177 wakaba 1.153 status => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3178 wakaba 1.40 %HTMLPhrasingContentChecker,
3179     check_attrs => $GetHTMLAttrsChecker->({
3180 wakaba 1.50 cite => $HTMLURIAttrChecker,
3181     }, {
3182 wakaba 1.49 %HTMLAttrStatus,
3183 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3184 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3185     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3186 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3187     sdasuff => FEATURE_HTML2X_RFC,
3188 wakaba 1.1 }),
3189 wakaba 1.66 check_start => sub {
3190     my ($self, $item, $element_state) = @_;
3191    
3192     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3193 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3194     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3195 wakaba 1.66 },
3196 wakaba 1.1 };
3197 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3198     ## placed inside the <code>q</code> element." Though we cannot test the
3199     ## element against this requirement since it incluides a semantic bit,
3200     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3201     ## the |q| element.
3202 wakaba 1.1
3203     $Element->{$HTML_NS}->{cite} = {
3204 wakaba 1.40 %HTMLPhrasingContentChecker,
3205 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3206 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3207     %HTMLAttrStatus,
3208 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3209 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3210 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3211 wakaba 1.49 }),
3212 wakaba 1.1 };
3213    
3214     $Element->{$HTML_NS}->{em} = {
3215 wakaba 1.40 %HTMLPhrasingContentChecker,
3216 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3217 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3218     %HTMLAttrStatus,
3219 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3220 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3221 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3222 wakaba 1.49 }),
3223 wakaba 1.1 };
3224    
3225     $Element->{$HTML_NS}->{strong} = {
3226 wakaba 1.40 %HTMLPhrasingContentChecker,
3227 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3228 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3229     %HTMLAttrStatus,
3230 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3231 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3232 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3233 wakaba 1.49 }),
3234 wakaba 1.1 };
3235    
3236     $Element->{$HTML_NS}->{small} = {
3237 wakaba 1.40 %HTMLPhrasingContentChecker,
3238 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3239 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3240     %HTMLAttrStatus,
3241     %HTMLM12NCommonAttrStatus,
3242 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3243 wakaba 1.49 }),
3244 wakaba 1.1 };
3245    
3246 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3247     %HTMLPhrasingContentChecker,
3248     status => FEATURE_M12N10_REC,
3249     check_attrs => $GetHTMLAttrsChecker->({}, {
3250     %HTMLAttrStatus,
3251     %HTMLM12NCommonAttrStatus,
3252 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3253 wakaba 1.51 }),
3254     };
3255    
3256 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3257 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3258 wakaba 1.40 %HTMLPhrasingContentChecker,
3259 wakaba 1.1 };
3260    
3261     $Element->{$HTML_NS}->{dfn} = {
3262 wakaba 1.40 %HTMLPhrasingContentChecker,
3263 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3264 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3265     %HTMLAttrStatus,
3266 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3267 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3268 wakaba 1.49 }),
3269 wakaba 1.40 check_start => sub {
3270     my ($self, $item, $element_state) = @_;
3271     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3272 wakaba 1.1
3273 wakaba 1.40 my $node = $item->{node};
3274 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3275     unless (defined $term) {
3276     for my $child (@{$node->child_nodes}) {
3277     if ($child->node_type == 1) { # ELEMENT_NODE
3278     if (defined $term) {
3279     undef $term;
3280     last;
3281     } elsif ($child->manakai_local_name eq 'abbr') {
3282     my $nsuri = $child->namespace_uri;
3283     if (defined $nsuri and $nsuri eq $HTML_NS) {
3284     my $attr = $child->get_attribute_node_ns (undef, 'title');
3285     if ($attr) {
3286     $term = $attr->value;
3287     }
3288     }
3289     }
3290     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3291     ## TEXT_NODE or CDATA_SECTION_NODE
3292 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3293 wakaba 1.1 next;
3294     }
3295     undef $term;
3296     last;
3297     }
3298     }
3299     unless (defined $term) {
3300     $term = $node->text_content;
3301     }
3302     }
3303     if ($self->{term}->{$term}) {
3304     push @{$self->{term}->{$term}}, $node;
3305     } else {
3306     $self->{term}->{$term} = [$node];
3307     }
3308 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3309     ## |ruby| unless |dfn| has |title|.
3310 wakaba 1.79
3311     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3312     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3313 wakaba 1.40 },
3314     check_end => sub {
3315     my ($self, $item, $element_state) = @_;
3316     $self->_remove_minus_elements ($element_state);
3317 wakaba 1.1
3318 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3319 wakaba 1.1 },
3320     };
3321    
3322     $Element->{$HTML_NS}->{abbr} = {
3323 wakaba 1.40 %HTMLPhrasingContentChecker,
3324 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3325 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3326     %HTMLAttrStatus,
3327 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3328     full => FEATURE_XHTML2_ED,
3329 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3330 wakaba 1.49 }),
3331 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3332     ## number (plural vs singular) must match the grammatical number of the
3333     ## contents of the element." Though this can be checked by machine,
3334     ## it requires language-specific knowledge and dictionary, such that
3335     ## we don't support the check of the requirement.
3336     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3337 wakaba 1.49 };
3338    
3339     $Element->{$HTML_NS}->{acronym} = {
3340     %HTMLPhrasingContentChecker,
3341     status => FEATURE_M12N10_REC,
3342     check_attrs => $GetHTMLAttrsChecker->({}, {
3343     %HTMLAttrStatus,
3344     %HTMLM12NCommonAttrStatus,
3345 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3346 wakaba 1.49 }),
3347 wakaba 1.1 };
3348    
3349     $Element->{$HTML_NS}->{time} = {
3350 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3351 wakaba 1.40 %HTMLPhrasingContentChecker,
3352     check_attrs => $GetHTMLAttrsChecker->({
3353 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3354 wakaba 1.49 }, {
3355     %HTMLAttrStatus,
3356     %HTMLM12NCommonAttrStatus,
3357 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3358 wakaba 1.1 }),
3359 wakaba 1.168 ## TODO: Update definition
3360 wakaba 1.1 ## TODO: Write tests
3361 wakaba 1.40 check_end => sub {
3362     my ($self, $item, $element_state) = @_;
3363 wakaba 1.1
3364 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3365 wakaba 1.1 my $input;
3366     my $reg_sp;
3367     my $input_node;
3368     if ($attr) {
3369     $input = $attr->value;
3370 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3371 wakaba 1.1 $input_node = $attr;
3372     } else {
3373 wakaba 1.40 $input = $item->{node}->text_content;
3374 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3375 wakaba 1.40 $input_node = $item->{node};
3376 wakaba 1.1
3377     ## ISSUE: What is the definition for "successfully extracts a date
3378     ## or time"? If the algorithm says the string is invalid but
3379     ## return some date or time, is it "successfully"?
3380     }
3381    
3382     my $hour;
3383     my $minute;
3384     my $second;
3385     if ($input =~ /
3386     \A
3387 wakaba 1.112 $reg_sp
3388 wakaba 1.1 ([0-9]+) # 1
3389     (?>
3390     -([0-9]+) # 2
3391 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3392     $reg_sp
3393 wakaba 1.1 (?>
3394     T
3395 wakaba 1.112 $reg_sp
3396 wakaba 1.1 )?
3397     ([0-9]+) # 4
3398     :([0-9]+) # 5
3399     (?>
3400     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3401     )?
3402 wakaba 1.112 $reg_sp
3403 wakaba 1.1 (?>
3404     Z
3405 wakaba 1.112 $reg_sp
3406 wakaba 1.1 |
3407     [+-]([0-9]+):([0-9]+) # 7, 8
3408 wakaba 1.112 $reg_sp
3409 wakaba 1.1 )?
3410     \z
3411     |
3412     :([0-9]+) # 9
3413     (?>
3414     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3415     )?
3416 wakaba 1.112 $reg_sp
3417     \z
3418 wakaba 1.1 )
3419     /x) {
3420     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3421     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3422     length $4 != 2 or length $5 != 2) {
3423     $self->{onerror}->(node => $input_node,
3424 wakaba 1.104 type => 'dateortime:syntax error',
3425     level => $self->{level}->{must});
3426 wakaba 1.1 }
3427    
3428     if (1 <= $2 and $2 <= 12) {
3429 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3430     level => $self->{level}->{must})
3431 wakaba 1.1 if $3 < 1 or
3432     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3433 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3434     level => $self->{level}->{must})
3435 wakaba 1.1 if $2 == 2 and $3 == 29 and
3436     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3437     } else {
3438     $self->{onerror}->(node => $input_node,
3439 wakaba 1.104 type => 'datetime:bad month',
3440     level => $self->{level}->{must});
3441 wakaba 1.1 }
3442    
3443     ($hour, $minute, $second) = ($4, $5, $6);
3444    
3445     if (defined $7) { ## [+-]hh:mm
3446     if (length $7 != 2 or length $8 != 2) {
3447     $self->{onerror}->(node => $input_node,
3448 wakaba 1.104 type => 'dateortime:syntax error',
3449     level => $self->{level}->{must});
3450 wakaba 1.1 }
3451    
3452     $self->{onerror}->(node => $input_node,
3453 wakaba 1.104 type => 'datetime:bad timezone hour',
3454     level => $self->{level}->{must})
3455 wakaba 1.1 if $7 > 23;
3456     $self->{onerror}->(node => $input_node,
3457 wakaba 1.104 type => 'datetime:bad timezone minute',
3458     level => $self->{level}->{must})
3459 wakaba 1.1 if $8 > 59;
3460     }
3461     } else { ## hh:mm
3462     if (length $1 != 2 or length $9 != 2) {
3463     $self->{onerror}->(node => $input_node,
3464 wakaba 1.104 type => qq'dateortime:syntax error',
3465     level => $self->{level}->{must});
3466 wakaba 1.1 }
3467    
3468     ($hour, $minute, $second) = ($1, $9, $10);
3469     }
3470    
3471 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3472     level => $self->{level}->{must}) if $hour > 23;
3473     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3474     level => $self->{level}->{must}) if $minute > 59;
3475 wakaba 1.1
3476     if (defined $second) { ## s
3477     ## NOTE: Integer part of second don't have to have length of two.
3478    
3479     if (substr ($second, 0, 1) eq '.') {
3480     $self->{onerror}->(node => $input_node,
3481 wakaba 1.104 type => 'dateortime:syntax error',
3482     level => $self->{level}->{must});
3483 wakaba 1.1 }
3484    
3485 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3486     level => $self->{level}->{must}) if $second >= 60;
3487 wakaba 1.1 }
3488     } else {
3489     $self->{onerror}->(node => $input_node,
3490 wakaba 1.104 type => 'dateortime:syntax error',
3491     level => $self->{level}->{must});
3492 wakaba 1.1 }
3493    
3494 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3495 wakaba 1.1 },
3496     };
3497    
3498     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3499 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3500 wakaba 1.113 ## TODO: content checking
3501     ## TODO: content or value must contain number (rev 2053)
3502 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3503 wakaba 1.40 %HTMLPhrasingContentChecker,
3504     check_attrs => $GetHTMLAttrsChecker->({
3505 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3506     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3507     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3508     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3509     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3510     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3511 wakaba 1.50 }, {
3512     %HTMLAttrStatus,
3513     high => FEATURE_HTML5_DEFAULT,
3514     low => FEATURE_HTML5_DEFAULT,
3515     max => FEATURE_HTML5_DEFAULT,
3516     min => FEATURE_HTML5_DEFAULT,
3517     optimum => FEATURE_HTML5_DEFAULT,
3518     value => FEATURE_HTML5_DEFAULT,
3519 wakaba 1.1 }),
3520     };
3521    
3522     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3523 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3524 wakaba 1.40 %HTMLPhrasingContentChecker,
3525     check_attrs => $GetHTMLAttrsChecker->({
3526 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3527     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3528 wakaba 1.50 }, {
3529     %HTMLAttrStatus,
3530     max => FEATURE_HTML5_DEFAULT,
3531     value => FEATURE_HTML5_DEFAULT,
3532 wakaba 1.1 }),
3533     };
3534    
3535     $Element->{$HTML_NS}->{code} = {
3536 wakaba 1.40 %HTMLPhrasingContentChecker,
3537 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3538 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3539     %HTMLAttrStatus,
3540 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3541 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3542 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3543 wakaba 1.49 }),
3544 wakaba 1.1 };
3545    
3546     $Element->{$HTML_NS}->{var} = {
3547 wakaba 1.40 %HTMLPhrasingContentChecker,
3548 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3549 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3550     %HTMLAttrStatus,
3551 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3552 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3553 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3554 wakaba 1.49 }),
3555 wakaba 1.1 };
3556    
3557     $Element->{$HTML_NS}->{samp} = {
3558 wakaba 1.40 %HTMLPhrasingContentChecker,
3559 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3560 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3561     %HTMLAttrStatus,
3562 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3563 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3564 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3565 wakaba 1.49 }),
3566 wakaba 1.1 };
3567    
3568     $Element->{$HTML_NS}->{kbd} = {
3569 wakaba 1.40 %HTMLPhrasingContentChecker,
3570 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3571 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3572     %HTMLAttrStatus,
3573 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3574 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3575 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3576 wakaba 1.49 }),
3577 wakaba 1.1 };
3578    
3579     $Element->{$HTML_NS}->{sub} = {
3580 wakaba 1.40 %HTMLPhrasingContentChecker,
3581 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3582 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3583     %HTMLAttrStatus,
3584 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3585 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3586 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3587 wakaba 1.49 }),
3588 wakaba 1.1 };
3589    
3590 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3591 wakaba 1.1
3592     $Element->{$HTML_NS}->{span} = {
3593 wakaba 1.40 %HTMLPhrasingContentChecker,
3594 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3595 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3596     %HTMLAttrStatus,
3597 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3598 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3599     dataformatas => FEATURE_HTML4_REC_RESERVED,
3600     datasrc => FEATURE_HTML4_REC_RESERVED,
3601 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3602 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3603 wakaba 1.49 }),
3604 wakaba 1.1 };
3605    
3606     $Element->{$HTML_NS}->{i} = {
3607 wakaba 1.40 %HTMLPhrasingContentChecker,
3608 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3609     check_attrs => $GetHTMLAttrsChecker->({}, {
3610     %HTMLAttrStatus,
3611     %HTMLM12NCommonAttrStatus,
3612 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3613 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3614 wakaba 1.49 }),
3615 wakaba 1.1 };
3616    
3617 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3618    
3619 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3620     %HTMLPhrasingContentChecker,
3621     status => FEATURE_M12N10_REC,
3622     check_attrs => $GetHTMLAttrsChecker->({}, {
3623     %HTMLAttrStatus,
3624     %HTMLM12NCommonAttrStatus,
3625 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3626 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3627     }),
3628     };
3629 wakaba 1.51
3630     $Element->{$HTML_NS}->{s} = {
3631 wakaba 1.40 %HTMLPhrasingContentChecker,
3632 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3633 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3634     %HTMLAttrStatus,
3635     %HTMLM12NCommonAttrStatus,
3636 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3637 wakaba 1.49 }),
3638 wakaba 1.1 };
3639    
3640 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3641    
3642     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3643    
3644 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3645 wakaba 1.40 %HTMLPhrasingContentChecker,
3646 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3647 wakaba 1.40 check_attrs => sub {
3648     my ($self, $item, $element_state) = @_;
3649 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3650     %HTMLAttrStatus,
3651 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3652     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3653     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3654     style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3655     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3656     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3657 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3658     sdasuff => FEATURE_HTML2X_RFC,
3659 wakaba 1.49 })->($self, $item, $element_state);
3660 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3661     $self->{onerror}->(node => $item->{node},
3662 wakaba 1.104 type => 'attribute missing',
3663     text => 'dir',
3664     level => $self->{level}->{must});
3665 wakaba 1.1 }
3666     },
3667     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3668     };
3669    
3670 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3671     %HTMLPhrasingContentChecker,
3672     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3673     check_attrs => $GetHTMLAttrsChecker->({}, {
3674     %HTMLAttrStatus,
3675     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3676 wakaba 1.153 lang => FEATURE_HTML5_WD,
3677 wakaba 1.99 }),
3678     check_start => sub {
3679     my ($self, $item, $element_state) = @_;
3680    
3681     $element_state->{phase} = 'before-rb';
3682     #$element_state->{has_sig}
3683 wakaba 1.100
3684     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3685     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3686 wakaba 1.99 },
3687     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3688     check_child_element => sub {
3689     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3690     $child_is_transparent, $element_state) = @_;
3691 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3692     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3693 wakaba 1.99 $self->{onerror}->(node => $child_el,
3694     type => 'element not allowed:minus',
3695 wakaba 1.104 level => $self->{level}->{must});
3696 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3697     #
3698     } elsif ($element_state->{phase} eq 'before-rb') {
3699     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3700     $element_state->{phase} = 'in-rb';
3701     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3702     $self->{onerror}->(node => $child_el,
3703 wakaba 1.104 level => $self->{level}->{should},
3704     type => 'no significant content before');
3705 wakaba 1.99 $element_state->{phase} = 'after-rt';
3706     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3707     $self->{onerror}->(node => $child_el,
3708 wakaba 1.104 level => $self->{level}->{should},
3709     type => 'no significant content before');
3710 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3711     } else {
3712     $self->{onerror}->(node => $child_el,
3713 wakaba 1.104 type => 'element not allowed:ruby base',
3714     level => $self->{level}->{must});
3715 wakaba 1.99 $element_state->{phase} = 'in-rb';
3716     }
3717     } elsif ($element_state->{phase} eq 'in-rb') {
3718     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3719     #$element_state->{phase} = 'in-rb';
3720     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3721     unless ($element_state->{has_significant}) {
3722     $self->{onerror}->(node => $child_el,
3723 wakaba 1.104 level => $self->{level}->{should},
3724     type => 'no significant content before');
3725 wakaba 1.99 }
3726     $element_state->{phase} = 'after-rt';
3727     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3728     unless ($element_state->{has_significant}) {
3729     $self->{onerror}->(node => $child_el,
3730 wakaba 1.104 level => $self->{level}->{should},
3731     type => 'no significant content before');
3732 wakaba 1.99 }
3733     $element_state->{phase} = 'after-rp1';
3734     } else {
3735     $self->{onerror}->(node => $child_el,
3736 wakaba 1.104 type => 'element not allowed:ruby base',
3737     level => $self->{level}->{must});
3738 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3739     }
3740     } elsif ($element_state->{phase} eq 'after-rt') {
3741     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3742     if ($element_state->{has_significant}) {
3743     $element_state->{has_sig} = 1;
3744     delete $element_state->{has_significant};
3745     }
3746     $element_state->{phase} = 'in-rb';
3747     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3748     $self->{onerror}->(node => $child_el,
3749 wakaba 1.104 level => $self->{level}->{should},
3750     type => 'no significant content before');
3751 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3752     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3753     $self->{onerror}->(node => $child_el,
3754 wakaba 1.104 level => $self->{level}->{should},
3755     type => 'no significant content before');
3756 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3757     } else {
3758     $self->{onerror}->(node => $child_el,
3759 wakaba 1.104 type => 'element not allowed:ruby base',
3760     level => $self->{level}->{must});
3761 wakaba 1.99 if ($element_state->{has_significant}) {
3762     $element_state->{has_sig} = 1;
3763     delete $element_state->{has_significant};
3764     }
3765     $element_state->{phase} = 'in-rb';
3766     }
3767     } elsif ($element_state->{phase} eq 'after-rp1') {
3768     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3769     $element_state->{phase} = 'after-rp-rt';
3770     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3771     $self->{onerror}->(node => $child_el,
3772 wakaba 1.104 type => 'ps element missing',
3773     text => 'rt',
3774     level => $self->{level}->{must});
3775 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3776     } else {
3777     $self->{onerror}->(node => $child_el,
3778 wakaba 1.104 type => 'ps element missing',
3779     text => 'rt',
3780     level => $self->{level}->{must});
3781 wakaba 1.99 $self->{onerror}->(node => $child_el,
3782 wakaba 1.104 type => 'ps element missing',
3783     text => 'rp',
3784     level => $self->{level}->{must});
3785 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3786     $self->{onerror}->(node => $child_el,
3787 wakaba 1.104 type => 'element not allowed:ruby base',
3788     level => $self->{level}->{must});
3789 wakaba 1.99 }
3790     if ($element_state->{has_significant}) {
3791     $element_state->{has_sig} = 1;
3792     delete $element_state->{has_significant};
3793     }
3794     $element_state->{phase} = 'in-rb';
3795     }
3796     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3797     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3798     $element_state->{phase} = 'after-rp2';
3799     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3800     $self->{onerror}->(node => $child_el,
3801 wakaba 1.104 type => 'ps element missing',
3802     text => 'rp',
3803     level => $self->{level}->{must});
3804 wakaba 1.99 $self->{onerror}->(node => $child_el,
3805 wakaba 1.104 level => $self->{level}->{should},
3806     type => 'no significant content before');
3807 wakaba 1.99 $element_state->{phase} = 'after-rt';
3808     } else {
3809     $self->{onerror}->(node => $child_el,
3810 wakaba 1.104 type => 'ps element missing',
3811     text => 'rp',
3812     level => $self->{level}->{must});
3813 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3814     $self->{onerror}->(node => $child_el,
3815 wakaba 1.104 type => 'element not allowed:ruby base',
3816     level => $self->{level}->{must});
3817 wakaba 1.99 }
3818     if ($element_state->{has_significant}) {
3819     $element_state->{has_sig} = 1;
3820     delete $element_state->{has_significant};
3821     }
3822     $element_state->{phase} = 'in-rb';
3823     }
3824     } elsif ($element_state->{phase} eq 'after-rp2') {
3825     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3826     if ($element_state->{has_significant}) {
3827     $element_state->{has_sig} = 1;
3828     delete $element_state->{has_significant};
3829     }
3830     $element_state->{phase} = 'in-rb';
3831     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3832     $self->{onerror}->(node => $child_el,
3833 wakaba 1.104 level => $self->{level}->{should},
3834     type => 'no significant content before');
3835 wakaba 1.99 $element_state->{phase} = 'after-rt';
3836     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3837     $self->{onerror}->(node => $child_el,
3838 wakaba 1.104 level => $self->{level}->{should},
3839     type => 'no significant content before');
3840 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3841     } else {
3842     $self->{onerror}->(node => $child_el,
3843 wakaba 1.104 type => 'element not allowed:ruby base',
3844     level => $self->{level}->{must});
3845 wakaba 1.99 if ($element_state->{has_significant}) {
3846     $element_state->{has_sig} = 1;
3847     delete $element_state->{has_significant};
3848     }
3849     $element_state->{phase} = 'in-rb';
3850     }
3851     } else {
3852     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3853     }
3854     },
3855     check_child_text => sub {
3856     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3857     if ($has_significant) {
3858     if ($element_state->{phase} eq 'before-rb') {
3859     $element_state->{phase} = 'in-rb';
3860     } elsif ($element_state->{phase} eq 'in-rb') {
3861     #
3862     } elsif ($element_state->{phase} eq 'after-rt' or
3863     $element_state->{phase} eq 'after-rp2') {
3864     $element_state->{phase} = 'in-rb';
3865     } elsif ($element_state->{phase} eq 'after-rp1') {
3866     $self->{onerror}->(node => $child_node,
3867 wakaba 1.104 type => 'ps element missing',
3868     text => 'rt',
3869     level => $self->{level}->{must});
3870 wakaba 1.99 $self->{onerror}->(node => $child_node,
3871 wakaba 1.104 type => 'ps element missing',
3872     text => 'rp',
3873     level => $self->{level}->{must});
3874 wakaba 1.99 $element_state->{phase} = 'in-rb';
3875     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3876     $self->{onerror}->(node => $child_node,
3877 wakaba 1.104 type => 'ps element missing',
3878     text => 'rp',
3879     level => $self->{level}->{must});
3880 wakaba 1.99 $element_state->{phase} = 'in-rb';
3881     } else {
3882     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3883     }
3884     }
3885     },
3886     check_end => sub {
3887     my ($self, $item, $element_state) = @_;
3888     $self->_remove_minus_elements ($element_state);
3889    
3890     if ($element_state->{phase} eq 'before-rb') {
3891     $self->{onerror}->(node => $item->{node},
3892 wakaba 1.104 level => $self->{level}->{should},
3893 wakaba 1.99 type => 'no significant content');
3894     $self->{onerror}->(node => $item->{node},
3895 wakaba 1.104 type => 'element missing',
3896     text => 'rt',
3897     level => $self->{level}->{must});
3898 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3899     unless ($element_state->{has_significant}) {
3900     $self->{onerror}->(node => $item->{node},
3901 wakaba 1.104 level => $self->{level}->{should},
3902     type => 'no significant content at the end');
3903 wakaba 1.99 }
3904     $self->{onerror}->(node => $item->{node},
3905 wakaba 1.104 type => 'element missing',
3906     text => 'rt',
3907     level => $self->{level}->{must});
3908 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3909     $element_state->{phase} eq 'after-rp2') {
3910     #
3911     } elsif ($element_state->{phase} eq 'after-rp1') {
3912     $self->{onerror}->(node => $item->{node},
3913 wakaba 1.104 type => 'element missing',
3914     text => 'rt',
3915     level => $self->{level}->{must});
3916 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3917 wakaba 1.104 type => 'element missing',
3918     text => 'rp',
3919     level => $self->{level}->{must});
3920 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3921     $self->{onerror}->(node => $item->{node},
3922 wakaba 1.104 type => 'element missing',
3923     text => 'rp',
3924     level => $self->{level}->{must});
3925 wakaba 1.99 } else {
3926     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3927     }
3928    
3929     ## NOTE: A modified version of |check_end| of %AnyChecker.
3930     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3931     $item->{real_parent_state}->{has_significant} = 1;
3932     }
3933     },
3934     };
3935    
3936     $Element->{$HTML_NS}->{rt} = {
3937     %HTMLPhrasingContentChecker,
3938     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3939     check_attrs => $GetHTMLAttrsChecker->({}, {
3940     %HTMLAttrStatus,
3941     %HTMLM12NXHTML2CommonAttrStatus,
3942 wakaba 1.153 lang => FEATURE_HTML5_WD,
3943 wakaba 1.99 }),
3944     };
3945    
3946     $Element->{$HTML_NS}->{rp} = {
3947     %HTMLTextChecker,
3948     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3949     check_attrs => $GetHTMLAttrsChecker->({}, {
3950     %HTMLAttrStatus,
3951     %HTMLM12NXHTML2CommonAttrStatus,
3952 wakaba 1.153 lang => FEATURE_HTML5_WD,
3953 wakaba 1.99 }),
3954 wakaba 1.100 check_start => sub {
3955 wakaba 1.99 my ($self, $item, $element_state) = @_;
3956     $element_state->{text} = '';
3957 wakaba 1.100
3958     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3959     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3960 wakaba 1.99 },
3961     check_child_text => sub {
3962     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3963     if ($has_significant) {
3964     $element_state->{text} .= $child_node->data;
3965     ## NOTE: |<rp> <!---->(</rp>| is allowed.
3966     }
3967     },
3968     check_end => sub {
3969     my ($self, $item, $element_state) = @_;
3970    
3971     my $p_class = ($item->{parent_state} and
3972     $item->{parent_state}->{phase} and
3973     $item->{parent_state}->{phase} eq 'after-rp2')
3974     ? qr/\p{Pe}/ : qr/\p{Ps}/;
3975     if ($element_state->{text} =~ /\A$p_class\z/) {
3976 wakaba 1.132 #=~ /\A[\x09\x0A\x0C\x0D\x20]*${p_class}[\x09\x0A\x0C\x0D\x20]*\z/) {
3977 wakaba 1.99 #
3978     } else {
3979     $self->{onerror}->(node => $item->{node},
3980 wakaba 1.104 type => 'rp:syntax error',
3981     level => $self->{level}->{must});
3982 wakaba 1.99 }
3983    
3984     $HTMLTextChecker{check_end}->(@_);
3985     },
3986     };
3987    
3988 wakaba 1.29 =pod
3989    
3990     ## TODO:
3991    
3992     +
3993     + <p>Partly because of the confusion described above, authors are
3994     + strongly recommended to always mark up all paragraphs with the
3995     + <code>p</code> element, and to not have any <code>ins</code> or
3996     + <code>del</code> elements that cross across any <span
3997     + title="paragraph">implied paragraphs</span>.</p>
3998     +
3999     (An informative note)
4000    
4001     <p><code>ins</code> elements should not cross <span
4002     + title="paragraph">implied paragraph</span> boundaries.</p>
4003     (normative)
4004    
4005     + <p><code>del</code> elements should not cross <span
4006     + title="paragraph">implied paragraph</span> boundaries.</p>
4007     (normative)
4008    
4009     =cut
4010    
4011 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4012 wakaba 1.40 %HTMLTransparentChecker,
4013 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4014 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4015 wakaba 1.1 cite => $HTMLURIAttrChecker,
4016 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4017 wakaba 1.49 }, {
4018     %HTMLAttrStatus,
4019     %HTMLM12NCommonAttrStatus,
4020 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4021 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4022     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4023 wakaba 1.1 }),
4024 wakaba 1.66 check_start => sub {
4025     my ($self, $item, $element_state) = @_;
4026    
4027     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4028 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4029     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4030 wakaba 1.66 },
4031 wakaba 1.1 };
4032    
4033     $Element->{$HTML_NS}->{del} = {
4034 wakaba 1.40 %HTMLTransparentChecker,
4035 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4036 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4037 wakaba 1.1 cite => $HTMLURIAttrChecker,
4038 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4039 wakaba 1.49 }, {
4040     %HTMLAttrStatus,
4041     %HTMLM12NCommonAttrStatus,
4042 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4043 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4044     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4045 wakaba 1.1 }),
4046 wakaba 1.40 check_end => sub {
4047     my ($self, $item, $element_state) = @_;
4048     if ($element_state->{has_significant}) {
4049     ## NOTE: Significantness flag does not propagate.
4050     } elsif ($item->{transparent}) {
4051     #
4052     } else {
4053     $self->{onerror}->(node => $item->{node},
4054 wakaba 1.104 level => $self->{level}->{should},
4055 wakaba 1.40 type => 'no significant content');
4056     }
4057 wakaba 1.1 },
4058 wakaba 1.66 check_start => sub {
4059     my ($self, $item, $element_state) = @_;
4060    
4061     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4062 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4063     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4064 wakaba 1.66 },
4065 wakaba 1.1 };
4066    
4067 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4068 wakaba 1.72 %HTMLFlowContentChecker,
4069 wakaba 1.153 status => FEATURE_HTML5_WD,
4070 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4071 wakaba 1.41 check_child_element => sub {
4072     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4073     $child_is_transparent, $element_state) = @_;
4074 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4075     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4076 wakaba 1.41 $self->{onerror}->(node => $child_el,
4077     type => 'element not allowed:minus',
4078 wakaba 1.104 level => $self->{level}->{must});
4079 wakaba 1.41 $element_state->{has_non_legend} = 1;
4080     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4081     #
4082     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4083     if ($element_state->{has_legend_at_first}) {
4084     $self->{onerror}->(node => $child_el,
4085     type => 'element not allowed:figure legend',
4086 wakaba 1.104 level => $self->{level}->{must});
4087 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4088     $self->{onerror}->(node => $element_state->{has_legend},
4089     type => 'element not allowed:figure legend',
4090 wakaba 1.104 level => $self->{level}->{must});
4091 wakaba 1.41 $element_state->{has_legend} = $child_el;
4092     } elsif ($element_state->{has_non_legend}) {
4093     $element_state->{has_legend} = $child_el;
4094     } else {
4095     $element_state->{has_legend_at_first} = 1;
4096 wakaba 1.35 }
4097 wakaba 1.41 delete $element_state->{has_non_legend};
4098     } else {
4099 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4100 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4101 wakaba 1.41 }
4102     },
4103     check_child_text => sub {
4104     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4105     if ($has_significant) {
4106     $element_state->{has_non_legend} = 1;
4107 wakaba 1.35 }
4108 wakaba 1.41 },
4109     check_end => sub {
4110     my ($self, $item, $element_state) = @_;
4111 wakaba 1.35
4112 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4113     #
4114     } elsif ($element_state->{has_legend}) {
4115     if ($element_state->{has_non_legend}) {
4116     $self->{onerror}->(node => $element_state->{has_legend},
4117 wakaba 1.35 type => 'element not allowed:figure legend',
4118 wakaba 1.104 level => $self->{level}->{must});
4119 wakaba 1.35 }
4120     }
4121 wakaba 1.41
4122 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4123 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4124 wakaba 1.35 },
4125     };
4126 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4127 wakaba 1.1
4128 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4129     my ($self, $attr) = @_;
4130 wakaba 1.104 $self->{onerror}->(node => $attr,
4131     type => 'unknown attribute',
4132     level => $self->{level}->{uncertain});
4133 wakaba 1.92 };
4134    
4135 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4136 wakaba 1.40 %HTMLEmptyChecker,
4137 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4138 wakaba 1.40 check_attrs => sub {
4139     my ($self, $item, $element_state) = @_;
4140 wakaba 1.1 $GetHTMLAttrsChecker->({
4141 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4142     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4143     }),
4144 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4145 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4146 wakaba 1.1 src => $HTMLURIAttrChecker,
4147     usemap => $HTMLUsemapAttrChecker,
4148 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4149 wakaba 1.1 ismap => sub {
4150 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4151     if (not $self->{flag}->{in_a_href}) {
4152 wakaba 1.15 $self->{onerror}->(node => $attr,
4153 wakaba 1.59 type => 'attribute not allowed:ismap',
4154 wakaba 1.104 level => $self->{level}->{must});
4155 wakaba 1.1 }
4156 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4157 wakaba 1.1 },
4158 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4159     ## TODO: HTML4 |name|
4160 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4161 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4162 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4163 wakaba 1.49 }, {
4164     %HTMLAttrStatus,
4165 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4166 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4167 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4168 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4169 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4170 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4171 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4172     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4173 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4174     name => FEATURE_M12N10_REC_DEPRECATED,
4175 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4176 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4177     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4178 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4179 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4180 wakaba 1.66 })->($self, $item, $element_state);
4181 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4182     $self->{onerror}->(node => $item->{node},
4183 wakaba 1.104 type => 'attribute missing',
4184     text => 'alt',
4185     level => $self->{level}->{should});
4186 wakaba 1.114 ## TODO: ...
4187 wakaba 1.1 }
4188 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4189     $self->{onerror}->(node => $item->{node},
4190 wakaba 1.104 type => 'attribute missing',
4191     text => 'src',
4192     level => $self->{level}->{must});
4193 wakaba 1.1 }
4194 wakaba 1.66
4195 wakaba 1.114 ## TODO: external resource check
4196    
4197 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4198     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4199     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4200     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4201 wakaba 1.1 },
4202     };
4203    
4204     $Element->{$HTML_NS}->{iframe} = {
4205 wakaba 1.40 %HTMLTextChecker,
4206 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4207 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4208 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4209 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4210 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4211 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4212     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4213     }),
4214     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4215 wakaba 1.1 src => $HTMLURIAttrChecker,
4216 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4217 wakaba 1.49 }, {
4218     %HTMLAttrStatus,
4219     %HTMLM12NCommonAttrStatus,
4220     align => FEATURE_XHTML10_REC,
4221 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4222 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4223 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4224     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4225 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4226     marginheight => FEATURE_M12N10_REC,
4227     marginwidth => FEATURE_M12N10_REC,
4228 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4229     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4230     sandbox => FEATURE_HTML5_WD,
4231 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4232 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4233     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4234 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4235     width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4236 wakaba 1.1 }),
4237 wakaba 1.66 check_start => sub {
4238     my ($self, $item, $element_state) = @_;
4239    
4240     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4241 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4242     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4243 wakaba 1.66 },
4244 wakaba 1.40 };
4245    
4246 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4247 wakaba 1.40 %HTMLEmptyChecker,
4248 wakaba 1.98 status => FEATURE_HTML5_WD,
4249 wakaba 1.40 check_attrs => sub {
4250     my ($self, $item, $element_state) = @_;
4251 wakaba 1.1 my $has_src;
4252 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4253 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4254     $attr_ns = '' unless defined $attr_ns;
4255     my $attr_ln = $attr->manakai_local_name;
4256     my $checker;
4257 wakaba 1.73
4258     my $status = {
4259     %HTMLAttrStatus,
4260 wakaba 1.153 height => FEATURE_HTML5_LC,
4261 wakaba 1.98 src => FEATURE_HTML5_WD,
4262     type => FEATURE_HTML5_WD,
4263 wakaba 1.153 width => FEATURE_HTML5_LC,
4264 wakaba 1.73 }->{$attr_ln};
4265    
4266 wakaba 1.1 if ($attr_ns eq '') {
4267     if ($attr_ln eq 'src') {
4268     $checker = $HTMLURIAttrChecker;
4269     $has_src = 1;
4270     } elsif ($attr_ln eq 'type') {
4271     $checker = $HTMLIMTAttrChecker;
4272 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4273     $checker = $AttrCheckerNotImplemented; ## TODO: because spec does not define them yet.
4274 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4275     $attr_ln !~ /[A-Z]/) {
4276 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4277     $status = $HTMLDatasetAttrStatus;
4278 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4279 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4280 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4281 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4282     || sub { }; ## NOTE: Any local attribute is ok.
4283 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4284 wakaba 1.117 } else {
4285     $checker = $HTMLAttrChecker->{$attr_ln};
4286 wakaba 1.1 }
4287     }
4288     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4289 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4290     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4291     || $AttrStatus->{$attr_ns}->{''};
4292     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4293 wakaba 1.62
4294 wakaba 1.1 if ($checker) {
4295 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4296 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4297 wakaba 1.54 #
4298 wakaba 1.1 } else {
4299 wakaba 1.104 $self->{onerror}->(node => $attr,
4300     type => 'unknown attribute',
4301     level => $self->{level}->{uncertain});
4302 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4303     }
4304    
4305 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4306 wakaba 1.1 }
4307    
4308     unless ($has_src) {
4309 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4310 wakaba 1.104 type => 'attribute missing',
4311     text => 'src',
4312 wakaba 1.114 level => $self->{level}->{info});
4313     ## NOTE: <embed> without src="" is allowed since revision 1929.
4314     ## We issues an informational message since <embed> w/o src=""
4315     ## is likely an authoring error.
4316 wakaba 1.1 }
4317 wakaba 1.114
4318     ## TODO: external resource check
4319 wakaba 1.66
4320     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4321 wakaba 1.1 },
4322     };
4323    
4324 wakaba 1.49 ## TODO:
4325     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4326     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4327    
4328 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4329 wakaba 1.40 %HTMLTransparentChecker,
4330 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4331 wakaba 1.40 check_attrs => sub {
4332     my ($self, $item, $element_state) = @_;
4333 wakaba 1.1 $GetHTMLAttrsChecker->({
4334 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4335     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4336     }),
4337     archive => $HTMLSpaceURIsAttrChecker,
4338     ## TODO: Relative to @codebase
4339     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4340     classid => $HTMLURIAttrChecker,
4341     codebase => $HTMLURIAttrChecker,
4342     codetype => $HTMLIMTAttrChecker,
4343     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4344 wakaba 1.1 data => $HTMLURIAttrChecker,
4345 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4346     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4347     ## [HTML4] but we don't know how to test this.
4348 wakaba 1.167 form => $HTMLFormAttrChecker,
4349 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4350 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4351 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4352     ## the name of the browsing context created by the element,
4353     ## if any, but is also used as the form control name of the
4354     ## form control provided by the plugin, if any.
4355 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4356 wakaba 1.1 type => $HTMLIMTAttrChecker,
4357     usemap => $HTMLUsemapAttrChecker,
4358 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4359 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4360 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4361 wakaba 1.49 }, {
4362     %HTMLAttrStatus,
4363 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4364 wakaba 1.49 align => FEATURE_XHTML10_REC,
4365 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4366 wakaba 1.49 border => FEATURE_XHTML10_REC,
4367     classid => FEATURE_M12N10_REC,
4368     codebase => FEATURE_M12N10_REC,
4369     codetype => FEATURE_M12N10_REC,
4370 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4371 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4372 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4373     dataformatas => FEATURE_HTML4_REC_RESERVED,
4374     datasrc => FEATURE_HTML4_REC_RESERVED,
4375 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4376 wakaba 1.167 form => FEATURE_HTML5_DEFAULT,
4377 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4378 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4379 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4380     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4381 wakaba 1.49 standby => FEATURE_M12N10_REC,
4382 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4383 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4384     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4385 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4386 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4387 wakaba 1.66 })->($self, $item, $element_state);
4388 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4389     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4390     $self->{onerror}->(node => $item->{node},
4391 wakaba 1.104 type => 'attribute missing:data|type',
4392     level => $self->{level}->{must});
4393 wakaba 1.1 }
4394     }
4395 wakaba 1.66
4396     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4397     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4398     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4399     ## TODO: archive
4400     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4401 wakaba 1.1 },
4402 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4403 wakaba 1.41 check_child_element => sub {
4404     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4405     $child_is_transparent, $element_state) = @_;
4406 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4407     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4408 wakaba 1.41 $self->{onerror}->(node => $child_el,
4409     type => 'element not allowed:minus',
4410 wakaba 1.104 level => $self->{level}->{must});
4411 wakaba 1.41 $element_state->{has_non_legend} = 1;
4412     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4413     #
4414     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4415     if ($element_state->{has_non_param}) {
4416 wakaba 1.104 $self->{onerror}->(node => $child_el,
4417 wakaba 1.72 type => 'element not allowed:flow',
4418 wakaba 1.104 level => $self->{level}->{must});
4419 wakaba 1.39 }
4420 wakaba 1.41 } else {
4421 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4422 wakaba 1.41 $element_state->{has_non_param} = 1;
4423 wakaba 1.39 }
4424 wakaba 1.25 },
4425 wakaba 1.41 check_child_text => sub {
4426     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4427     if ($has_significant) {
4428     $element_state->{has_non_param} = 1;
4429     }
4430 wakaba 1.42 },
4431     check_end => sub {
4432     my ($self, $item, $element_state) = @_;
4433     if ($element_state->{has_significant}) {
4434 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4435 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4436     ## NOTE: Transparent.
4437     } else {
4438     $self->{onerror}->(node => $item->{node},
4439 wakaba 1.104 level => $self->{level}->{should},
4440 wakaba 1.42 type => 'no significant content');
4441     }
4442     },
4443 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4444 wakaba 1.1 };
4445 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4446     ## What about |<section><object data><style scoped></style>x</object></section>|?
4447     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4448 wakaba 1.1
4449     $Element->{$HTML_NS}->{param} = {
4450 wakaba 1.40 %HTMLEmptyChecker,
4451 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4452 wakaba 1.40 check_attrs => sub {
4453     my ($self, $item, $element_state) = @_;
4454 wakaba 1.1 $GetHTMLAttrsChecker->({
4455     name => sub { },
4456 wakaba 1.70 type => $HTMLIMTAttrChecker,
4457 wakaba 1.1 value => sub { },
4458 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4459     data => 1, ref => 1, object => 1,
4460     }),
4461 wakaba 1.49 }, {
4462     %HTMLAttrStatus,
4463 wakaba 1.154 href => FEATURE_RDFA_REC,
4464 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4465     name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4466 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4467 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4468 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4469 wakaba 1.66 })->(@_);
4470 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4471     $self->{onerror}->(node => $item->{node},
4472 wakaba 1.104 type => 'attribute missing',
4473     text => 'name',
4474     level => $self->{level}->{must});
4475 wakaba 1.1 }
4476 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4477     $self->{onerror}->(node => $item->{node},
4478 wakaba 1.104 type => 'attribute missing',
4479     text => 'value',
4480     level => $self->{level}->{must});
4481 wakaba 1.1 }
4482     },
4483     };
4484    
4485     $Element->{$HTML_NS}->{video} = {
4486 wakaba 1.40 %HTMLTransparentChecker,
4487 wakaba 1.48 status => FEATURE_HTML5_LC,
4488 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4489 wakaba 1.1 src => $HTMLURIAttrChecker,
4490     ## TODO: start, loopstart, loopend, end
4491     ## ISSUE: they MUST be "value time offset"s. Value?
4492 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4493 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4494     controls => $GetHTMLBooleanAttrChecker->('controls'),
4495 wakaba 1.59 poster => $HTMLURIAttrChecker,
4496 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4497     width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4498 wakaba 1.50 }, {
4499     %HTMLAttrStatus,
4500     autoplay => FEATURE_HTML5_LC,
4501     controls => FEATURE_HTML5_LC,
4502 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4503 wakaba 1.50 height => FEATURE_HTML5_LC,
4504 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4505     loopstart => FEATURE_HTML5_AT_RISK,
4506     playcount => FEATURE_HTML5_AT_RISK,
4507 wakaba 1.50 poster => FEATURE_HTML5_LC,
4508     src => FEATURE_HTML5_LC,
4509 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4510 wakaba 1.50 width => FEATURE_HTML5_LC,
4511 wakaba 1.1 }),
4512 wakaba 1.42 check_start => sub {
4513     my ($self, $item, $element_state) = @_;
4514     $element_state->{allow_source}
4515     = not $item->{node}->has_attribute_ns (undef, 'src');
4516     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4517     ## NOTE: It might be set true by |check_element|.
4518 wakaba 1.66
4519     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4520     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4521 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4522     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4523 wakaba 1.42 },
4524     check_child_element => sub {
4525     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4526     $child_is_transparent, $element_state) = @_;
4527 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4528     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4529 wakaba 1.42 $self->{onerror}->(node => $child_el,
4530     type => 'element not allowed:minus',
4531 wakaba 1.104 level => $self->{level}->{must});
4532 wakaba 1.42 delete $element_state->{allow_source};
4533     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4534     #
4535     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4536 wakaba 1.45 unless ($element_state->{allow_source}) {
4537 wakaba 1.104 $self->{onerror}->(node => $child_el,
4538 wakaba 1.72 type => 'element not allowed:flow',
4539 wakaba 1.104 level => $self->{level}->{must});
4540 wakaba 1.42 }
4541 wakaba 1.45 $element_state->{has_source} = 1;
4542 wakaba 1.1 } else {
4543 wakaba 1.42 delete $element_state->{allow_source};
4544 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4545 wakaba 1.42 }
4546     },
4547     check_child_text => sub {
4548     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4549     if ($has_significant) {
4550     delete $element_state->{allow_source};
4551     }
4552 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4553 wakaba 1.42 },
4554     check_end => sub {
4555     my ($self, $item, $element_state) = @_;
4556     if ($element_state->{has_source} == -1) {
4557     $self->{onerror}->(node => $item->{node},
4558 wakaba 1.104 type => 'child element missing',
4559     text => 'source',
4560     level => $self->{level}->{must});
4561 wakaba 1.1 }
4562 wakaba 1.42
4563     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4564 wakaba 1.1 },
4565     };
4566    
4567     $Element->{$HTML_NS}->{audio} = {
4568 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4569 wakaba 1.48 status => FEATURE_HTML5_LC,
4570 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4571     src => $HTMLURIAttrChecker,
4572     ## TODO: start, loopstart, loopend, end
4573     ## ISSUE: they MUST be "value time offset"s. Value?
4574     ## ISSUE: playcount has no conformance creteria
4575     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4576     controls => $GetHTMLBooleanAttrChecker->('controls'),
4577 wakaba 1.50 }, {
4578     %HTMLAttrStatus,
4579     autoplay => FEATURE_HTML5_LC,
4580     controls => FEATURE_HTML5_LC,
4581 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4582     loopend => FEATURE_HTML5_AT_RISK,
4583     loopstart => FEATURE_HTML5_AT_RISK,
4584     playcount => FEATURE_HTML5_AT_RISK,
4585 wakaba 1.50 src => FEATURE_HTML5_LC,
4586 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4587 wakaba 1.42 }),
4588 wakaba 1.1 };
4589    
4590     $Element->{$HTML_NS}->{source} = {
4591 wakaba 1.40 %HTMLEmptyChecker,
4592 wakaba 1.153 status => FEATURE_HTML5_LC,
4593 wakaba 1.40 check_attrs => sub {
4594     my ($self, $item, $element_state) = @_;
4595 wakaba 1.1 $GetHTMLAttrsChecker->({
4596 wakaba 1.90 media => $HTMLMQAttrChecker,
4597     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4598     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4599 wakaba 1.1 type => $HTMLIMTAttrChecker,
4600 wakaba 1.50 }, {
4601     %HTMLAttrStatus,
4602 wakaba 1.153 media => FEATURE_HTML5_LC,
4603     pixelratio => FEATURE_HTML5_LC,
4604     src => FEATURE_HTML5_LC,
4605     type => FEATURE_HTML5_LC,
4606 wakaba 1.66 })->(@_);
4607 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4608     $self->{onerror}->(node => $item->{node},
4609 wakaba 1.104 type => 'attribute missing',
4610     text => 'src',
4611     level => $self->{level}->{must});
4612 wakaba 1.1 }
4613 wakaba 1.66
4614     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4615 wakaba 1.1 },
4616     };
4617    
4618     $Element->{$HTML_NS}->{canvas} = {
4619 wakaba 1.40 %HTMLTransparentChecker,
4620 wakaba 1.89 status => FEATURE_HTML5_COMPLETE,
4621 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4622 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4623     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4624 wakaba 1.50 }, {
4625     %HTMLAttrStatus,
4626 wakaba 1.89 height => FEATURE_HTML5_COMPLETE,
4627     width => FEATURE_HTML5_COMPLETE,
4628 wakaba 1.1 }),
4629     };
4630    
4631     $Element->{$HTML_NS}->{map} = {
4632 wakaba 1.72 %HTMLFlowContentChecker,
4633 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4634 wakaba 1.40 check_attrs => sub {
4635     my ($self, $item, $element_state) = @_;
4636 wakaba 1.100 my $has_name;
4637 wakaba 1.4 $GetHTMLAttrsChecker->({
4638 wakaba 1.100 name => sub {
4639     my ($self, $attr) = @_;
4640     my $value = $attr->value;
4641     if (length $value) {
4642     ## NOTE: Duplication is not non-conforming.
4643     ## NOTE: Space characters are not non-conforming.
4644     #
4645     } else {
4646     $self->{onerror}->(node => $attr,
4647     type => 'empty attribute value',
4648 wakaba 1.104 level => $self->{level}->{must});
4649 wakaba 1.100 }
4650 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4651 wakaba 1.100 $has_name = [$value, $attr];
4652 wakaba 1.4 },
4653 wakaba 1.49 }, {
4654     %HTMLAttrStatus,
4655 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4656     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4657     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4658     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4659     #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4660     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4661 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4662     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4663     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4664     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4665     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4666     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4667     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4668     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4669     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4670     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4671 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4672 wakaba 1.66 })->(@_);
4673 wakaba 1.100
4674 wakaba 1.135 if ($has_name) {
4675 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4676 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4677 wakaba 1.155 $self->{onerror}
4678     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4679     type => 'id ne name',
4680     level => $self->{level}->{must});
4681 wakaba 1.100 }
4682 wakaba 1.135 } else {
4683 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4684 wakaba 1.104 type => 'attribute missing',
4685     text => 'name',
4686     level => $self->{level}->{must});
4687 wakaba 1.100 }
4688 wakaba 1.4 },
4689 wakaba 1.59 check_start => sub {
4690     my ($self, $item, $element_state) = @_;
4691     $element_state->{in_map_original} = $self->{flag}->{in_map};
4692 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4693     ## NOTE: |{in_map}| is a reference to the array which contains
4694     ## hash references. Hashes are corresponding to the opening
4695     ## |map| elements and each of them contains the key-value
4696     ## pairs corresponding to the absolute URLs for the processed
4697     ## |area| elements in the |map| element corresponding to the
4698     ## hash. The key represents the resource (## TODO: use
4699     ## absolute URL), while the value represents whether there is
4700     ## an |area| element whose |alt| attribute is specified to a
4701     ## non-empty value. If there IS such an |area| element for
4702     ## the resource specified by the key, then the value is set to
4703     ## zero (|0|). Otherwise, if there is no such an |area|
4704     ## element but there is any |area| element with the empty
4705     ## |alt=""| attribute, then the value contains an array
4706     ## reference that contains all of such |area| elements.
4707 wakaba 1.79
4708     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4709     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4710 wakaba 1.59 },
4711     check_end => sub {
4712     my ($self, $item, $element_state) = @_;
4713 wakaba 1.137
4714     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4715     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4716     next unless $nodes;
4717     for (@$nodes) {
4718     $self->{onerror}->(type => 'empty area alt',
4719     node => $_,
4720     level => $self->{level}->{html5_no_may});
4721     }
4722     }
4723    
4724     $self->{flag}->{in_map} = $element_state->{in_map_original};
4725    
4726 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4727 wakaba 1.59 },
4728 wakaba 1.1 };
4729    
4730     $Element->{$HTML_NS}->{area} = {
4731 wakaba 1.40 %HTMLEmptyChecker,
4732 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4733 wakaba 1.40 check_attrs => sub {
4734     my ($self, $item, $element_state) = @_;
4735 wakaba 1.1 my %attr;
4736     my $coords;
4737 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4738 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4739     $attr_ns = '' unless defined $attr_ns;
4740     my $attr_ln = $attr->manakai_local_name;
4741     my $checker;
4742 wakaba 1.73 my $status;
4743 wakaba 1.1 if ($attr_ns eq '') {
4744 wakaba 1.73 $status = {
4745     %HTMLAttrStatus,
4746     %HTMLM12NCommonAttrStatus,
4747     accesskey => FEATURE_M12N10_REC,
4748 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4749     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4750 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4751 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4752     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4753 wakaba 1.154 media => FEATURE_HTML5_WD,
4754 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4755     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4756     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4757 wakaba 1.153 ping => FEATURE_HTML5_WD,
4758 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4759 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4760 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4761 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4762     type => FEATURE_HTML5_WD,
4763 wakaba 1.73 }->{$attr_ln};
4764    
4765 wakaba 1.1 $checker = {
4766 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4767 wakaba 1.153 alt => sub {
4768     ## NOTE: Checked later.
4769     },
4770 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4771     circ => -1, circle => 1,
4772     default => 1,
4773     poly => 1, polygon => -1,
4774     rect => 1, rectangle => -1,
4775     }),
4776     coords => sub {
4777     my ($self, $attr) = @_;
4778     my $value = $attr->value;
4779     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4780     $coords = [split /,/, $value];
4781     } else {
4782     $self->{onerror}->(node => $attr,
4783 wakaba 1.104 type => 'coords:syntax error',
4784     level => $self->{level}->{must});
4785 wakaba 1.1 }
4786     },
4787 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4788     target => $HTMLTargetAttrChecker,
4789 wakaba 1.1 href => $HTMLURIAttrChecker,
4790     ping => $HTMLSpaceURIsAttrChecker,
4791 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4792 wakaba 1.1 media => $HTMLMQAttrChecker,
4793     hreflang => $HTMLLanguageTagAttrChecker,
4794     type => $HTMLIMTAttrChecker,
4795     }->{$attr_ln};
4796     if ($checker) {
4797     $attr{$attr_ln} = $attr;
4798 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4799     $attr_ln !~ /[A-Z]/) {
4800 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4801     $status = $HTMLDatasetAttrStatus;
4802 wakaba 1.1 } else {
4803     $checker = $HTMLAttrChecker->{$attr_ln};
4804     }
4805     }
4806     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4807 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4808     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4809     || $AttrStatus->{$attr_ns}->{''};
4810     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4811 wakaba 1.62
4812 wakaba 1.1 if ($checker) {
4813 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4814 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4815 wakaba 1.54 #
4816 wakaba 1.1 } else {
4817 wakaba 1.104 $self->{onerror}->(node => $attr,
4818     type => 'unknown attribute',
4819     level => $self->{level}->{uncertain});
4820 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4821     }
4822 wakaba 1.49
4823 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4824 wakaba 1.1 }
4825    
4826     if (defined $attr{href}) {
4827 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4828 wakaba 1.137 if (defined $attr{alt}) {
4829     my $url = $attr{href}->value; ## TODO: resolve
4830     if (length $attr{alt}->value) {
4831     for (@{$self->{flag}->{in_map} or []}) {
4832     $_->{$url} = 0;
4833     }
4834     } else {
4835     ## NOTE: Empty |alt=""|. If there is another |area| element
4836     ## with the same |href=""| and that |area| elemnet's
4837     ## |alt=""| attribute is not an empty string, then this
4838     ## is conforming.
4839     for (@{$self->{flag}->{in_map} or []}) {
4840     push @{$_->{$url} ||= []}, $attr{alt}
4841     unless exists $_->{$url} and not $_->{$url};
4842     }
4843     }
4844     } else {
4845 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4846 wakaba 1.104 type => 'attribute missing',
4847     text => 'alt',
4848     level => $self->{level}->{must});
4849 wakaba 1.1 }
4850     } else {
4851     for (qw/target ping rel media hreflang type alt/) {
4852     if (defined $attr{$_}) {
4853     $self->{onerror}->(node => $attr{$_},
4854 wakaba 1.104 type => 'attribute not allowed',
4855     level => $self->{level}->{must});
4856 wakaba 1.1 }
4857     }
4858     }
4859    
4860     my $shape = 'rectangle';
4861     if (defined $attr{shape}) {
4862     $shape = {
4863     circ => 'circle', circle => 'circle',
4864     default => 'default',
4865     poly => 'polygon', polygon => 'polygon',
4866     rect => 'rectangle', rectangle => 'rectangle',
4867     }->{lc $attr{shape}->value} || 'rectangle';
4868     ## TODO: ASCII lowercase?
4869     }
4870    
4871     if ($shape eq 'circle') {
4872     if (defined $attr{coords}) {
4873     if (defined $coords) {
4874     if (@$coords == 3) {
4875     if ($coords->[2] < 0) {
4876     $self->{onerror}->(node => $attr{coords},
4877 wakaba 1.104 type => 'coords:out of range',
4878     index => 2,
4879     value => $coords->[2],
4880     level => $self->{level}->{must});
4881 wakaba 1.1 }
4882     } else {
4883     $self->{onerror}->(node => $attr{coords},
4884 wakaba 1.104 type => 'coords:number not 3',
4885     text => 0+@$coords,
4886     level => $self->{level}->{must});
4887 wakaba 1.1 }
4888     } else {
4889     ## NOTE: A syntax error has been reported.
4890     }
4891     } else {
4892 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4893 wakaba 1.104 type => 'attribute missing',
4894     text => 'coords',
4895     level => $self->{level}->{must});
4896 wakaba 1.1 }
4897     } elsif ($shape eq 'default') {
4898     if (defined $attr{coords}) {
4899     $self->{onerror}->(node => $attr{coords},
4900 wakaba 1.104 type => 'attribute not allowed',
4901     level => $self->{level}->{must});
4902 wakaba 1.1 }
4903     } elsif ($shape eq 'polygon') {
4904     if (defined $attr{coords}) {
4905     if (defined $coords) {
4906     if (@$coords >= 6) {
4907     unless (@$coords % 2 == 0) {
4908     $self->{onerror}->(node => $attr{coords},
4909 wakaba 1.104 type => 'coords:number not even',
4910     text => 0+@$coords,
4911     level => $self->{level}->{must});
4912 wakaba 1.1 }
4913     } else {
4914     $self->{onerror}->(node => $attr{coords},
4915 wakaba 1.104 type => 'coords:number lt 6',
4916     text => 0+@$coords,
4917     level => $self->{level}->{must});
4918 wakaba 1.1 }
4919     } else {
4920     ## NOTE: A syntax error has been reported.
4921     }
4922     } else {
4923 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4924 wakaba 1.104 type => 'attribute missing',
4925     text => 'coords',
4926     level => $self->{level}->{must});
4927 wakaba 1.1 }
4928     } elsif ($shape eq 'rectangle') {
4929     if (defined $attr{coords}) {
4930     if (defined $coords) {
4931     if (@$coords == 4) {
4932     unless ($coords->[0] < $coords->[2]) {
4933     $self->{onerror}->(node => $attr{coords},
4934 wakaba 1.104 type => 'coords:out of range',
4935     index => 0,
4936     value => $coords->[0],
4937     level => $self->{level}->{must});
4938 wakaba 1.1 }
4939     unless ($coords->[1] < $coords->[3]) {
4940     $self->{onerror}->(node => $attr{coords},
4941 wakaba 1.104 type => 'coords:out of range',
4942     index => 1,
4943     value => $coords->[1],
4944     level => $self->{level}->{must});
4945 wakaba 1.1 }
4946     } else {
4947     $self->{onerror}->(node => $attr{coords},
4948 wakaba 1.104 type => 'coords:number not 4',
4949     text => 0+@$coords,
4950     level => $self->{level}->{must});
4951 wakaba 1.1 }
4952     } else {
4953     ## NOTE: A syntax error has been reported.
4954     }
4955     } else {
4956 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4957 wakaba 1.104 type => 'attribute missing',
4958     text => 'coords',
4959     level => $self->{level}->{must});
4960 wakaba 1.1 }
4961     }
4962 wakaba 1.66
4963     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
4964 wakaba 1.1 },
4965 wakaba 1.59 check_start => sub {
4966     my ($self, $item, $element_state) = @_;
4967     unless ($self->{flag}->{in_map} or
4968     not $item->{node}->manakai_parent_element) {
4969     $self->{onerror}->(node => $item->{node},
4970     type => 'element not allowed:area',
4971 wakaba 1.104 level => $self->{level}->{must});
4972 wakaba 1.59 }
4973 wakaba 1.79
4974     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4975     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4976 wakaba 1.59 },
4977 wakaba 1.1 };
4978    
4979     $Element->{$HTML_NS}->{table} = {
4980 wakaba 1.40 %HTMLChecker,
4981 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4982 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
4983 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
4984     cellspacing => $HTMLLengthAttrChecker,
4985 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
4986     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
4987     lhs => 1, rhs => 1, box => 1, border => 1,
4988     }),
4989     rules => $GetHTMLEnumeratedAttrChecker->({
4990     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
4991     }),
4992     summary => sub {}, ## NOTE: %Text; in HTML4.
4993     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
4994     }, {
4995 wakaba 1.49 %HTMLAttrStatus,
4996 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4997 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4998     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
4999     border => FEATURE_M12N10_REC,
5000     cellpadding => FEATURE_M12N10_REC,
5001     cellspacing => FEATURE_M12N10_REC,
5002 wakaba 1.61 cols => FEATURE_RFC1942,
5003 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5004     dataformatas => FEATURE_HTML4_REC_RESERVED,
5005     datapagesize => FEATURE_M12N10_REC,
5006     datasrc => FEATURE_HTML4_REC_RESERVED,
5007     frame => FEATURE_M12N10_REC,
5008 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5009 wakaba 1.49 rules => FEATURE_M12N10_REC,
5010     summary => FEATURE_M12N10_REC,
5011     width => FEATURE_M12N10_REC,
5012     }),
5013 wakaba 1.40 check_start => sub {
5014     my ($self, $item, $element_state) = @_;
5015     $element_state->{phase} = 'before caption';
5016 wakaba 1.66
5017     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5018 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5019     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5020 wakaba 1.40 },
5021     check_child_element => sub {
5022     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5023     $child_is_transparent, $element_state) = @_;
5024 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5025     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5026 wakaba 1.40 $self->{onerror}->(node => $child_el,
5027     type => 'element not allowed:minus',
5028 wakaba 1.104 level => $self->{level}->{must});
5029 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5030     #
5031     } elsif ($element_state->{phase} eq 'in tbodys') {
5032     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5033     #$element_state->{phase} = 'in tbodys';
5034     } elsif (not $element_state->{has_tfoot} and
5035     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5036     $element_state->{phase} = 'after tfoot';
5037     $element_state->{has_tfoot} = 1;
5038     } else {
5039 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5040     level => $self->{level}->{must});
5041 wakaba 1.40 }
5042     } elsif ($element_state->{phase} eq 'in trs') {
5043     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5044     #$element_state->{phase} = 'in trs';
5045     } elsif (not $element_state->{has_tfoot} and
5046     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5047     $element_state->{phase} = 'after tfoot';
5048     $element_state->{has_tfoot} = 1;
5049     } else {
5050 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5051     level => $self->{level}->{must});
5052 wakaba 1.40 }
5053     } elsif ($element_state->{phase} eq 'after thead') {
5054     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5055     $element_state->{phase} = 'in tbodys';
5056     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5057     $element_state->{phase} = 'in trs';
5058     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5059     $element_state->{phase} = 'in tbodys';
5060     $element_state->{has_tfoot} = 1;
5061     } else {
5062 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5063     level => $self->{level}->{must});
5064 wakaba 1.40 }
5065     } elsif ($element_state->{phase} eq 'in colgroup') {
5066     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5067     $element_state->{phase} = 'in colgroup';
5068     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5069     $element_state->{phase} = 'after thead';
5070     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5071     $element_state->{phase} = 'in tbodys';
5072     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5073     $element_state->{phase} = 'in trs';
5074     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5075     $element_state->{phase} = 'in tbodys';
5076     $element_state->{has_tfoot} = 1;
5077     } else {
5078 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5079     level => $self->{level}->{must});
5080 wakaba 1.40 }
5081     } elsif ($element_state->{phase} eq 'before caption') {
5082     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5083     $element_state->{phase} = 'in colgroup';
5084     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5085     $element_state->{phase} = 'in colgroup';
5086     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5087     $element_state->{phase} = 'after thead';
5088     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5089     $element_state->{phase} = 'in tbodys';
5090     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5091     $element_state->{phase} = 'in trs';
5092     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5093     $element_state->{phase} = 'in tbodys';
5094     $element_state->{has_tfoot} = 1;
5095     } else {
5096 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5097     level => $self->{level}->{must});
5098 wakaba 1.40 }
5099     } elsif ($element_state->{phase} eq 'after tfoot') {
5100 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5101     level => $self->{level}->{must});
5102 wakaba 1.40 } else {
5103     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5104     }
5105     },
5106     check_child_text => sub {
5107     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5108     if ($has_significant) {
5109 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5110     level => $self->{level}->{must});
5111 wakaba 1.1 }
5112 wakaba 1.40 },
5113     check_end => sub {
5114     my ($self, $item, $element_state) = @_;
5115 wakaba 1.1
5116     ## Table model errors
5117     require Whatpm::HTMLTable;
5118 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5119 wakaba 1.104 $self->{onerror}->(@_);
5120     }, $self->{level});
5121 wakaba 1.87 Whatpm::HTMLTable->assign_header
5122 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5123 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5124 wakaba 1.1
5125 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5126 wakaba 1.1 },
5127     };
5128    
5129     $Element->{$HTML_NS}->{caption} = {
5130 wakaba 1.169 %HTMLFlowContentChecker,
5131 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5132 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5133     align => $GetHTMLEnumeratedAttrChecker->({
5134     top => 1, bottom => 1, left => 1, right => 1,
5135     }),
5136     }, {
5137 wakaba 1.49 %HTMLAttrStatus,
5138 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5139 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5140 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5141 wakaba 1.49 }),
5142 wakaba 1.169 check_start => sub {
5143     my ($self, $item, $element_state) = @_;
5144     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5145    
5146     $HTMLFlowContentChecker{check_start}->(@_);
5147     },
5148     check_end => sub {
5149     my ($self, $item, $element_state) = @_;
5150     $self->_remove_minus_elements ($element_state);
5151    
5152     $HTMLFlowContentChecker{check_end}->(@_);
5153     },
5154     }; # caption
5155 wakaba 1.1
5156 wakaba 1.69 my %cellalign = (
5157     ## HTML4 %cellhalign;
5158 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5159     left => 1, center => 1, right => 1, justify => 1, char => 1,
5160     }),
5161     char => sub {
5162     my ($self, $attr) = @_;
5163 wakaba 1.69
5164 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5165    
5166     my $value = $attr->value;
5167     if (length $value != 1) {
5168     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5169 wakaba 1.105 level => $self->{level}->{html4_fact});
5170 wakaba 1.70 }
5171     },
5172 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5173    
5174 wakaba 1.69 ## HTML4 %cellvalign;
5175 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5176     top => 1, middle => 1, bottom => 1, baseline => 1,
5177     }),
5178 wakaba 1.69 );
5179    
5180 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5181 wakaba 1.40 %HTMLEmptyChecker,
5182 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5183 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5184 wakaba 1.69 %cellalign,
5185 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5186     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5187     ## TODO: "attribute not supported" if |col|.
5188     ## ISSUE: MUST NOT if any |col|?
5189     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5190 wakaba 1.49 }, {
5191     %HTMLAttrStatus,
5192 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5193 wakaba 1.49 align => FEATURE_M12N10_REC,
5194     char => FEATURE_M12N10_REC,
5195     charoff => FEATURE_M12N10_REC,
5196 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5197     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5198 wakaba 1.49 valign => FEATURE_M12N10_REC,
5199     width => FEATURE_M12N10_REC,
5200 wakaba 1.1 }),
5201 wakaba 1.40 check_child_element => sub {
5202     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5203     $child_is_transparent, $element_state) = @_;
5204 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5205     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5206 wakaba 1.40 $self->{onerror}->(node => $child_el,
5207     type => 'element not allowed:minus',
5208 wakaba 1.104 level => $self->{level}->{must});
5209 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5210     #
5211     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5212     #
5213     } else {
5214 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5215     level => $self->{level}->{must});
5216 wakaba 1.40 }
5217     },
5218     check_child_text => sub {
5219     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5220     if ($has_significant) {
5221 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5222     level => $self->{level}->{must});
5223 wakaba 1.1 }
5224     },
5225     };
5226    
5227     $Element->{$HTML_NS}->{col} = {
5228 wakaba 1.40 %HTMLEmptyChecker,
5229 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5230 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5231 wakaba 1.69 %cellalign,
5232 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5233 wakaba 1.49 }, {
5234     %HTMLAttrStatus,
5235 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5236 wakaba 1.49 align => FEATURE_M12N10_REC,
5237     char => FEATURE_M12N10_REC,
5238     charoff => FEATURE_M12N10_REC,
5239 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5240     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5241 wakaba 1.49 valign => FEATURE_M12N10_REC,
5242     width => FEATURE_M12N10_REC,
5243 wakaba 1.1 }),
5244     };
5245    
5246     $Element->{$HTML_NS}->{tbody} = {
5247 wakaba 1.40 %HTMLChecker,
5248 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5249 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5250     %cellalign,
5251     }, {
5252 wakaba 1.49 %HTMLAttrStatus,
5253 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5254 wakaba 1.49 align => FEATURE_M12N10_REC,
5255     char => FEATURE_M12N10_REC,
5256     charoff => FEATURE_M12N10_REC,
5257 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5258 wakaba 1.49 valign => FEATURE_M12N10_REC,
5259     }),
5260 wakaba 1.40 check_child_element => sub {
5261     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5262     $child_is_transparent, $element_state) = @_;
5263 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5264     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5265 wakaba 1.40 $self->{onerror}->(node => $child_el,
5266     type => 'element not allowed:minus',
5267 wakaba 1.104 level => $self->{level}->{must});
5268 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5269     #
5270     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5271 wakaba 1.84 #
5272 wakaba 1.40 } else {
5273 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5274     level => $self->{level}->{must});
5275 wakaba 1.40 }
5276     },
5277     check_child_text => sub {
5278     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5279     if ($has_significant) {
5280 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5281     level => $self->{level}->{must});
5282 wakaba 1.1 }
5283 wakaba 1.40 },
5284 wakaba 1.1 };
5285    
5286     $Element->{$HTML_NS}->{thead} = {
5287 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5288 wakaba 1.1 };
5289    
5290     $Element->{$HTML_NS}->{tfoot} = {
5291 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5292 wakaba 1.1 };
5293    
5294     $Element->{$HTML_NS}->{tr} = {
5295 wakaba 1.40 %HTMLChecker,
5296 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5297 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5298     %cellalign,
5299     bgcolor => $HTMLColorAttrChecker,
5300     }, {
5301 wakaba 1.49 %HTMLAttrStatus,
5302 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5303 wakaba 1.49 align => FEATURE_M12N10_REC,
5304     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5305     char => FEATURE_M12N10_REC,
5306     charoff => FEATURE_M12N10_REC,
5307 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5308 wakaba 1.49 valign => FEATURE_M12N10_REC,
5309     }),
5310 wakaba 1.40 check_child_element => sub {
5311     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5312     $child_is_transparent, $element_state) = @_;
5313 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5314     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5315 wakaba 1.40 $self->{onerror}->(node => $child_el,
5316     type => 'element not allowed:minus',
5317 wakaba 1.104 level => $self->{level}->{must});
5318 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5319     #
5320     } elsif ($child_nsuri eq $HTML_NS and
5321     ($child_ln eq 'td' or $child_ln eq 'th')) {
5322 wakaba 1.84 #
5323 wakaba 1.40 } else {
5324 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5325     level => $self->{level}->{must});
5326 wakaba 1.40 }
5327     },
5328     check_child_text => sub {
5329     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5330     if ($has_significant) {
5331 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5332     level => $self->{level}->{must});
5333 wakaba 1.1 }
5334     },
5335     };
5336    
5337     $Element->{$HTML_NS}->{td} = {
5338 wakaba 1.72 %HTMLFlowContentChecker,
5339 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5340 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5341 wakaba 1.69 %cellalign,
5342     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5343     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5344     bgcolor => $HTMLColorAttrChecker,
5345 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5346 wakaba 1.87 headers => sub {
5347     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5348     ## Though that method does not check the |headers| attribute of a
5349     ## |td| element if the element does not form a table, in that case
5350     ## the |td| element is non-conforming anyway.
5351     },
5352 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5353 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5354 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5355     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5356 wakaba 1.49 }, {
5357     %HTMLAttrStatus,
5358 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5359     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5360 wakaba 1.49 align => FEATURE_M12N10_REC,
5361 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5362 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5363     char => FEATURE_M12N10_REC,
5364     charoff => FEATURE_M12N10_REC,
5365 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5366     headers => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5367 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5368 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5369 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5370 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5371 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5372 wakaba 1.49 valign => FEATURE_M12N10_REC,
5373     width => FEATURE_M12N10_REC_DEPRECATED,
5374 wakaba 1.1 }),
5375     };
5376    
5377     $Element->{$HTML_NS}->{th} = {
5378 wakaba 1.40 %HTMLPhrasingContentChecker,
5379 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5380 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5381 wakaba 1.69 %cellalign,
5382     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5383     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5384     bgcolor => $HTMLColorAttrChecker,
5385 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5386 wakaba 1.87 ## TODO: HTML4(?) |headers|
5387 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5388 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5389     scope => $GetHTMLEnumeratedAttrChecker
5390     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5391 wakaba 1.49 }, {
5392     %HTMLAttrStatus,
5393 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5394     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5395 wakaba 1.49 align => FEATURE_M12N10_REC,
5396 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5397 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5398     char => FEATURE_M12N10_REC,
5399     charoff => FEATURE_M12N10_REC,
5400 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5401 wakaba 1.82 headers => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5402 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5403 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5404 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5405 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5406     scope => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5407 wakaba 1.49 valign => FEATURE_M12N10_REC,
5408     width => FEATURE_M12N10_REC_DEPRECATED,
5409 wakaba 1.1 }),
5410     };
5411    
5412 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5413 wakaba 1.121 %HTMLFlowContentChecker,
5414 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5415 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5416 wakaba 1.161 accept => $AcceptAttrChecker,
5417 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5418 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5419 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5420 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5421     'application/x-www-form-urlencoded' => 1,
5422     'multipart/form-data' => 1,
5423     'text/plain' => 1,
5424     }),
5425 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5426     get => 1, post => 1, put => 1, delete => 1,
5427     }),
5428 wakaba 1.133 name => sub {
5429     my ($self, $attr) = @_;
5430    
5431     my $value = $attr->value;
5432     if ($value eq '') {
5433     $self->{onerror}->(type => 'empty form name',
5434     node => $attr,
5435     level => $self->{level}->{must});
5436     } else {
5437     if ($self->{form}->{$value}) {
5438     $self->{onerror}->(type => 'duplicate form name',
5439     node => $attr,
5440     value => $value,
5441     level => $self->{level}->{must});
5442     } else {
5443     $self->{form}->{$value} = 1;
5444     }
5445     }
5446     },
5447 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5448     ## TODO: Tests for following attrs:
5449 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5450     onforminput => $HTMLEventHandlerAttrChecker,
5451 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5452     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5453 wakaba 1.52 target => $HTMLTargetAttrChecker,
5454     }, {
5455     %HTMLAttrStatus,
5456     %HTMLM12NCommonAttrStatus,
5457 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5458 wakaba 1.119 'accept-charset' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5459     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5460 wakaba 1.56 data => FEATURE_WF2,
5461 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5462 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5463 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5464     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5465     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5466 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5467 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5468     onforminput => FEATURE_WF2_INFORMATIVE,
5469 wakaba 1.56 onreceived => FEATURE_WF2,
5470 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5471     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5472 wakaba 1.56 replace => FEATURE_WF2,
5473 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5474     sdasuff => FEATURE_HTML20_RFC,
5475 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5476 wakaba 1.52 }),
5477 wakaba 1.66 check_start => sub {
5478     my ($self, $item, $element_state) = @_;
5479 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5480 wakaba 1.66
5481     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5482     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5483 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5484     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5485 wakaba 1.136 $element_state->{id_type} = 'form';
5486 wakaba 1.66 },
5487 wakaba 1.121 check_end => sub {
5488     my ($self, $item, $element_state) = @_;
5489     $self->_remove_minus_elements ($element_state);
5490    
5491     $HTMLFlowContentChecker{check_end}->(@_);
5492     },
5493 wakaba 1.52 };
5494    
5495     $Element->{$HTML_NS}->{fieldset} = {
5496 wakaba 1.134 %HTMLFlowContentChecker,
5497 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5498 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5499     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5500 wakaba 1.136 form => $HTMLFormAttrChecker,
5501 wakaba 1.165 name => $FormControlNameAttrChecker,
5502 wakaba 1.56 }, {
5503 wakaba 1.52 %HTMLAttrStatus,
5504     %HTMLM12NCommonAttrStatus,
5505 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5506     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5507 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5508 wakaba 1.125 name => FEATURE_HTML5_DEFAULT,
5509 wakaba 1.52 }),
5510 wakaba 1.134 ## NOTE: legend, Flow
5511     check_child_element => sub {
5512     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5513     $child_is_transparent, $element_state) = @_;
5514     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5515     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5516     $self->{onerror}->(node => $child_el,
5517     type => 'element not allowed:minus',
5518     level => $self->{level}->{must});
5519     $element_state->{has_non_legend} = 1;
5520     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5521     #
5522     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5523     if ($element_state->{has_non_legend}) {
5524     $self->{onerror}->(node => $child_el,
5525     type => 'element not allowed:details legend',
5526     level => $self->{level}->{must});
5527     }
5528     $element_state->{has_legend} = 1;
5529     $element_state->{has_non_legend} = 1;
5530     } else {
5531     $HTMLFlowContentChecker{check_child_element}->(@_);
5532     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5533     ## TODO:
5534 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5535 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5536     ## therefore |details| part of the content model does not match.
5537     }
5538     },
5539     check_child_text => sub {
5540     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5541     if ($has_significant) {
5542     $element_state->{has_non_legend} = 1;
5543     }
5544     },
5545     check_end => sub {
5546     my ($self, $item, $element_state) = @_;
5547    
5548     unless ($element_state->{has_legend}) {
5549     $self->{onerror}->(node => $item->{node},
5550     type => 'child element missing',
5551     text => 'legend',
5552     level => $self->{level}->{must});
5553     }
5554    
5555     $HTMLFlowContentChecker{check_end}->(@_);
5556 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5557 wakaba 1.134 },
5558     ## NOTE: This definition is partially reused by |details| element's
5559     ## checker.
5560 wakaba 1.52 };
5561    
5562     $Element->{$HTML_NS}->{input} = {
5563 wakaba 1.119 %HTMLEmptyChecker,
5564     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5565 wakaba 1.140 check_attrs => sub {
5566     my ($self, $item, $element_state) = @_;
5567 wakaba 1.142
5568 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5569 wakaba 1.142 $state = 'text' unless defined $state;
5570     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5571    
5572 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5573     my $attr_ns = $attr->namespace_uri;
5574     $attr_ns = '' unless defined $attr_ns;
5575     my $attr_ln = $attr->manakai_local_name;
5576     my $checker;
5577     my $status;
5578     if ($attr_ns eq '') {
5579     $status =
5580     {
5581     %HTMLAttrStatus,
5582     %HTMLM12NCommonAttrStatus,
5583     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5584     'accept-charset' => FEATURE_HTML2X_RFC,
5585     accesskey => FEATURE_M12N10_REC,
5586     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5587     align => FEATURE_M12N10_REC_DEPRECATED,
5588     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5589     autocomplete => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5590     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5591     checked => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5592     datafld => FEATURE_HTML4_REC_RESERVED,
5593     dataformatas => FEATURE_HTML4_REC_RESERVED,
5594     datasrc => FEATURE_HTML4_REC_RESERVED,
5595     disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5596     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5597     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5598 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5599     FEATURE_XHTMLBASIC11_CR,
5600 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5601 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5602 wakaba 1.140 list => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5603     max => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5604 wakaba 1.150 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X |
5605     FEATURE_M12N10_REC,
5606 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5607     min => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5608 wakaba 1.156 multiple => FEATURE_HTML5_DEFAULT,
5609 wakaba 1.140 name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5610 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5611 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5612     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5613     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5614     onformchange => FEATURE_WF2_INFORMATIVE,
5615     onforminput => FEATURE_WF2_INFORMATIVE,
5616     oninput => FEATURE_WF2,
5617     oninvalid => FEATURE_WF2,
5618     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5619     pattern => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5620 wakaba 1.156 placeholder => FEATURE_HTML5_DEFAULT,
5621 wakaba 1.140 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5622     replace => FEATURE_WF2,
5623     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5624     sdapref => FEATURE_HTML20_RFC,
5625 wakaba 1.154 size => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5626 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5627     step => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5628     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5629     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5630 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5631 wakaba 1.140 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5632     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5633     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5634     }->{$attr_ln};
5635    
5636     $checker =
5637     {
5638 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5639     ## applicable for a specific set of states.
5640 wakaba 1.142 accept => '',
5641 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5642     ## NOTE: To which states it applies is not defined in RFC 2070.
5643 wakaba 1.150 accesskey => '', ## NOTE: Not applied to |hidden| [WF2].
5644 wakaba 1.142 action => '',
5645 wakaba 1.150 align => '',
5646 wakaba 1.141 alt => '',
5647 wakaba 1.142 autocomplete => '',
5648 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5649     ## NOTE: <input type=hidden disabled> is not disallowed.
5650 wakaba 1.142 checked => '',
5651     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5652 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5653 wakaba 1.142 enctype => '',
5654     form => $HTMLFormAttrChecker,
5655 wakaba 1.150 inputmode => '',
5656     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5657 wakaba 1.142 list => '',
5658     max => '',
5659     maxlength => '',
5660     method => '',
5661     min => '',
5662 wakaba 1.156 multiple => '',
5663 wakaba 1.165 name => $FormControlNameAttrChecker,
5664 wakaba 1.166 novalidate => '',
5665 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5666     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5667     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5668     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5669     ## TODO: tests for four attributes above
5670 wakaba 1.142 pattern => '',
5671 wakaba 1.156 placeholder => '',
5672 wakaba 1.142 readonly => '',
5673 wakaba 1.150 replace => '',
5674 wakaba 1.142 required => '',
5675     size => '',
5676     src => '',
5677     step => '',
5678     target => '',
5679 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5680 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5681     email => 1, password => 1,
5682 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5683 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5684     checkbox => 1,
5685 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5686     button => 1,
5687 wakaba 1.140 }),
5688 wakaba 1.151 usemap => '',
5689 wakaba 1.142 value => '',
5690 wakaba 1.140 }->{$attr_ln};
5691 wakaba 1.141
5692     ## State-dependent checkers
5693     unless ($checker) {
5694     if ($state eq 'hidden') {
5695     $checker =
5696     {
5697 wakaba 1.142 value => sub {
5698     my ($self, $attr, $item, $element_state) = @_;
5699 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5700 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5701     $self->{onerror}->(node => $attr,
5702     type => '_charset_ value',
5703     level => $self->{level}->{must});
5704     }
5705     },
5706 wakaba 1.141 }->{$attr_ln} || $checker;
5707 wakaba 1.142 ## TODO: Warn if no name attribute?
5708     ## TODO: Warn if name!=_charset_ and no value attribute?
5709 wakaba 1.168 } elsif ({
5710     datetime => 1, date => 1, month => 1, time => 1,
5711     week => 1, 'datetime-local' => 1,
5712     }->{$state}) {
5713     my $v = {
5714     datetime => ['global_date_and_time_string'],
5715     date => ['date_string'],
5716     month => ['month_string'],
5717     week => ['week_string'],
5718     time => ['time_string'],
5719     'datetime-local' => ['local_date_and_time_string'],
5720     }->{$state};
5721 wakaba 1.144 $checker =
5722     {
5723 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5724 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5725     on => 1, off => 1,
5726     }),
5727 wakaba 1.158 list => $ListAttrChecker,
5728 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5729     max => $GetDateTimeAttrChecker->($v->[0]),
5730 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5731 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5732 wakaba 1.148 step => $StepAttrChecker,
5733 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5734 wakaba 1.144 }->{$attr_ln} || $checker;
5735     } elsif ($state eq 'number') {
5736     $checker =
5737     {
5738 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5739 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5740     on => 1, off => 1,
5741     }),
5742 wakaba 1.158 list => $ListAttrChecker,
5743 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5744     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5745 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5746 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5747 wakaba 1.148 step => $StepAttrChecker,
5748 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5749 wakaba 1.144 }->{$attr_ln} || $checker;
5750     } elsif ($state eq 'range') {
5751     $checker =
5752     {
5753 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5754 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5755     on => 1, off => 1,
5756     }),
5757 wakaba 1.158 list => $ListAttrChecker,
5758 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5759     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5760 wakaba 1.148 step => $StepAttrChecker,
5761 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5762 wakaba 1.144 }->{$attr_ln} || $checker;
5763 wakaba 1.157 } elsif ($state eq 'color') {
5764     $checker =
5765     {
5766     accesskey => $HTMLAccesskeyAttrChecker,
5767     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5768     on => 1, off => 1,
5769     }),
5770 wakaba 1.158 list => $ListAttrChecker,
5771 wakaba 1.157 value => sub {
5772     my ($self, $attr) = @_;
5773     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5774     $self->{onerror}->(node => $attr,
5775     type => 'scolor:syntax error', ## TODOC: type
5776     level => $self->{level}->{must});
5777     }
5778     },
5779     }->{$attr_ln} || $checker;
5780 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5781     $checker =
5782     {
5783 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5784 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5785     ## ISSUE: checked value not (yet?) defined.
5786     ## TODO: tests
5787 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5788 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5789     }->{$attr_ln} || $checker;
5790     ## TODO: There MUST be another input type=radio with same
5791     ## name (Radio state).
5792     ## ISSUE: There should be exactly one type=radio with checked?
5793     } elsif ($state eq 'file') {
5794     $checker =
5795     {
5796 wakaba 1.161 accept => $AcceptAttrChecker,
5797 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5798 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5799 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5800 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5801 wakaba 1.144 }->{$attr_ln} || $checker;
5802     } elsif ($state eq 'submit') {
5803     $checker =
5804     {
5805 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5806 wakaba 1.149 action => $HTMLURIAttrChecker,
5807 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5808     'application/x-www-form-urlencoded' => 1,
5809     'multipart/form-data' => 1,
5810     'text/plain' => 1,
5811     }),
5812 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5813     get => 1, post => 1, put => 1, delete => 1,
5814     }),
5815 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5816 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5817     document => 1, values => 1,
5818     }),
5819     target => $HTMLTargetAttrChecker,
5820 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5821     }->{$attr_ln} || $checker;
5822     } elsif ($state eq 'image') {
5823     $checker =
5824     {
5825 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5826 wakaba 1.149 action => $HTMLURIAttrChecker,
5827     align => $GetHTMLEnumeratedAttrChecker->({
5828     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5829     }),
5830 wakaba 1.144 alt => sub {
5831     my ($self, $attr) = @_;
5832     my $value = $attr->value;
5833     unless (length $value) {
5834     $self->{onerror}->(node => $attr,
5835     type => 'empty anchor image alt',
5836     level => $self->{level}->{must});
5837     }
5838     },
5839 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5840     'application/x-www-form-urlencoded' => 1,
5841     'multipart/form-data' => 1,
5842     'text/plain' => 1,
5843     }),
5844 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5845     method => $GetHTMLEnumeratedAttrChecker->({
5846     get => 1, post => 1, put => 1, delete => 1,
5847     }),
5848 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5849 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5850     document => 1, values => 1,
5851     }),
5852 wakaba 1.144 src => $HTMLURIAttrChecker,
5853     ## TODO: There is requirements on the referenced resource.
5854 wakaba 1.149 target => $HTMLTargetAttrChecker,
5855     usemap => $HTMLUsemapAttrChecker,
5856 wakaba 1.144 }->{$attr_ln} || $checker;
5857     ## TODO: alt & src are required.
5858     } elsif ({
5859     reset => 1, button => 1,
5860     ## NOTE: From Web Forms 2.0:
5861     remove => 1, 'move-up' => 1, 'move-down' => 1,
5862     add => 1,
5863     }->{$state}) {
5864     $checker =
5865     {
5866 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5867 wakaba 1.144 ## NOTE: According to Web Forms 2.0, |input| attribute
5868     ## has |template| attribute to support the |add| button
5869     ## type (as part of the repetition template feature). It
5870     ## conflicts with the |template| global attribute
5871     ## introduced as part of the data template feature.
5872     ## NOTE: |template| attribute as defined in Web Forms 2.0
5873     ## has no author requirement.
5874     value => sub { }, ## NOTE: No restriction.
5875     }->{$attr_ln} || $checker;
5876 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5877 wakaba 1.141 $checker =
5878     {
5879 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5880 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5881     on => 1, off => 1,
5882     }),
5883 wakaba 1.149 ## TODO: inputmode [WF2]
5884 wakaba 1.158 list => $ListAttrChecker,
5885 wakaba 1.147 maxlength => sub {
5886     my ($self, $attr, $item, $element_state) = @_;
5887    
5888     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5889    
5890 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5891 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5892     ## integers results in a number.
5893     my $max_allowed_value_length = 0+$1;
5894    
5895     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5896     if (defined $value) {
5897     my $codepoint_length = length $value;
5898 wakaba 1.162
5899 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5900     $self->{onerror}
5901     ->(node => $item->{node}
5902     ->get_attribute_node_ns (undef, 'value'),
5903     type => 'value too long',
5904     level => $self->{level}->{must});
5905     }
5906     }
5907     }
5908     },
5909 wakaba 1.160 pattern => $PatternAttrChecker,
5910 wakaba 1.159 placeholder => sub {
5911     my ($self, $attr) = @_;
5912     if ($attr->value =~ /[\x0D\x0A]/) {
5913     $self->{onerror}->(node => $attr,
5914     type => 'newline in value', ## TODOC: type
5915     level => $self->{level}->{must});
5916     }
5917     },
5918 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5919 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5920 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5921 wakaba 1.143 value => sub {
5922 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5923     if ($state eq 'url') {
5924     $HTMLURIAttrChecker->(@_);
5925     } elsif ($state eq 'email') {
5926     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5927     my @addr = split /,/, $attr->value, -1;
5928     @addr = ('') unless @addr;
5929     for (@addr) {
5930 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5931     s/[\x09\x0A\x0C\x0D\x20]\z//;
5932 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5933     $self->{onerror}->(node => $attr,
5934     type => 'email:syntax error', ## TODO: type
5935     value => $_,
5936     level => $self->{level}->{must});
5937     }
5938     }
5939     } else {
5940     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5941     $self->{onerror}->(node => $attr,
5942     type => 'email:syntax error', ## TODO: type
5943     level => $self->{level}->{must});
5944     }
5945     }
5946     } else {
5947     if ($attr->value =~ /[\x0D\x0A]/) {
5948     $self->{onerror}->(node => $attr,
5949     type => 'newline in value', ## TODO: type
5950     level => $self->{level}->{must});
5951     }
5952     }
5953 wakaba 1.143 },
5954 wakaba 1.141 }->{$attr_ln} || $checker;
5955 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5956 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
5957     if $state eq 'email' and $attr_ln eq 'multiple';
5958 wakaba 1.161
5959     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
5960     not $item->{node}->has_attribute_ns (undef, 'title')) {
5961     $self->{onerror}->(node => $item->{node},
5962     type => 'attribute missing',
5963     text => 'title',
5964     level => $self->{level}->{should});
5965     }
5966 wakaba 1.141 }
5967     }
5968    
5969     if (defined $checker) {
5970     if ($checker eq '') {
5971     $checker = sub {
5972     my ($self, $attr) = @_;
5973     $self->{onerror}->(node => $attr,
5974     type => 'input attr not applicable',
5975     text => $state,
5976     level => $self->{level}->{must});
5977     };
5978     }
5979 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
5980     $attr_ln !~ /[A-Z]/) {
5981     $checker = $HTMLDatasetAttrChecker;
5982     $status = $HTMLDatasetAttrStatus;
5983     } else {
5984     $checker = $HTMLAttrChecker->{$attr_ln};
5985     }
5986     }
5987     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
5988     || $AttrChecker->{$attr_ns}->{''};
5989     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
5990     || $AttrStatus->{$attr_ns}->{''};
5991     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
5992 wakaba 1.157
5993     ## TODOC: accesskey="" is also applied to type=search and type=color
5994 wakaba 1.140
5995     if ($checker) {
5996     $checker->($self, $attr, $item, $element_state) if ref $checker;
5997     } elsif ($attr_ns eq '' and not $status) {
5998     #
5999     } else {
6000     $self->{onerror}->(node => $attr,
6001     type => 'unknown attribute',
6002     level => $self->{level}->{uncertain});
6003     ## ISSUE: No comformance createria for unknown attributes in the spec
6004     }
6005    
6006     $self->_attr_status_info ($attr, $status);
6007     }
6008 wakaba 1.168
6009     ## ISSUE: -0/+0
6010    
6011     if ($state eq 'range') {
6012     $element_state->{number_value}->{min} ||= 0;
6013     $element_state->{number_value}->{max} = 100
6014     unless defined $element_state->{number_value}->{max};
6015     }
6016    
6017     if (defined $element_state->{date_value}->{min} or
6018     defined $element_state->{date_value}->{max}) {
6019     my $min_value = $element_state->{date_value}->{min};
6020     my $max_value = $element_state->{date_value}->{max};
6021     my $value_value = $element_state->{date_value}->{value};
6022    
6023     if (defined $min_value and $min_value eq '' and
6024     (defined $max_value or defined $value_value)) {
6025     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6026     $self->{onerror}->(node => $min,
6027     type => 'date value not supported', ## TODOC: type
6028     value => $min->value,
6029     level => $self->{level}->{unsupported});
6030     undef $min_value;
6031     }
6032     if (defined $max_value and $max_value eq '' and
6033     (defined $max_value or defined $value_value)) {
6034     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6035     $self->{onerror}->(node => $max,
6036     type => 'date value not supported', ## TODOC: type
6037     value => $max->value,
6038     level => $self->{level}->{unsupported});
6039     undef $max_value;
6040     }
6041     if (defined $value_value and $value_value eq '' and
6042     (defined $max_value or defined $min_value)) {
6043     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6044     $self->{onerror}->(node => $value,
6045     type => 'date value not supported', ## TODOC: type
6046     value => $value->value,
6047     level => $self->{level}->{unsupported});
6048     undef $value_value;
6049     }
6050    
6051     if (defined $min_value and defined $max_value) {
6052     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6053     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6054     $self->{onerror}->(node => $max,
6055     type => 'max lt min', ## TODOC: type
6056     level => $self->{level}->{must});
6057     }
6058     }
6059    
6060     if (defined $min_value and defined $value_value) {
6061     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6062     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6063     $self->{onerror}->(node => $value,
6064     type => 'value lt min', ## TODOC: type
6065     level => $self->{level}->{warn});
6066     ## NOTE: Not an error.
6067     }
6068     }
6069    
6070     if (defined $max_value and defined $value_value) {
6071     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6072     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6073     $self->{onerror}->(node => $value,
6074     type => 'value gt max', ## TODOC: type
6075     level => $self->{level}->{warn});
6076     ## NOTE: Not an error.
6077     }
6078     }
6079     } elsif (defined $element_state->{number_value}->{min} or
6080     defined $element_state->{number_value}->{max}) {
6081     my $min_value = $element_state->{number_value}->{min};
6082     my $max_value = $element_state->{number_value}->{max};
6083     my $value_value = $element_state->{number_value}->{value};
6084    
6085     if (defined $min_value and defined $max_value) {
6086     if ($min_value > $max_value) {
6087     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6088     $self->{onerror}->(node => $max,
6089     type => 'max lt min', ## TODOC: type
6090     level => $self->{level}->{must});
6091     }
6092     }
6093    
6094     if (defined $min_value and defined $value_value) {
6095     if ($min_value > $value_value) {
6096     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6097     $self->{onerror}->(node => $value,
6098     type => 'value lt min', ## TODOC: type
6099     level => $self->{level}->{warn});
6100     ## NOTE: Not an error.
6101     }
6102     }
6103    
6104     if (defined $max_value and defined $value_value) {
6105     if ($max_value < $value_value) {
6106     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6107     $self->{onerror}->(node => $value,
6108     type => 'value gt max', ## TODOC: type
6109     level => $self->{level}->{warn});
6110     ## NOTE: Not an error.
6111     }
6112     }
6113     }
6114 wakaba 1.150
6115 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6116    
6117 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6118     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6119     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6120     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6121     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6122 wakaba 1.140 },
6123 wakaba 1.66 check_start => sub {
6124     my ($self, $item, $element_state) = @_;
6125 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6126     $self->{onerror}->(node => $item->{node},
6127     type => 'multiple labelable fae',
6128     level => $self->{level}->{must});
6129     } else {
6130     $self->{flag}->{has_labelable} = 2;
6131     }
6132 wakaba 1.138
6133     $element_state->{id_type} = 'labelable';
6134 wakaba 1.66 },
6135 wakaba 1.52 };
6136    
6137 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
6138    
6139 wakaba 1.80 ## NOTE: "authors who are nesting repetition blocks should position such
6140     ## [repetition-block-related] buttons carefully to make clear which block a
6141 wakaba 1.150 ## button applies to." [WF2]: I have no idea how this can be tested.
6142 wakaba 1.80
6143 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6144 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6145     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6146 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6147 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6148 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6149     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6150     ## |button| elements.
6151 wakaba 1.56 action => $HTMLURIAttrChecker,
6152 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6153 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6154 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6155     'application/x-www-form-urlencoded' => 1,
6156     'multipart/form-data' => 1,
6157     'text/plain' => 1,
6158     }),
6159 wakaba 1.136 form => $HTMLFormAttrChecker,
6160 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6161     get => 1, post => 1, put => 1, delete => 1,
6162     }),
6163 wakaba 1.165 name => $FormControlNameAttrChecker,
6164 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6165 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6166     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6167 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6168     target => $HTMLTargetAttrChecker,
6169 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6170     ## attribute to support the |add| button type (as part of repetition
6171     ## template feature). It conflicts with the |template| global attribute
6172     ## introduced as part of the data template feature.
6173     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6174     ## author requirement.
6175 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6176     button => 1, submit => 1, reset => 1,
6177     }),
6178 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6179 wakaba 1.52 }, {
6180     %HTMLAttrStatus,
6181     %HTMLM12NCommonAttrStatus,
6182     accesskey => FEATURE_M12N10_REC,
6183 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6184     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6185 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6186     dataformatas => FEATURE_HTML4_REC_RESERVED,
6187     datasrc => FEATURE_HTML4_REC_RESERVED,
6188 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6189     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6190     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6191 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6192 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6193     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6194 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6195 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6196     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6197 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6198     onforminput => FEATURE_WF2_INFORMATIVE,
6199 wakaba 1.56 replace => FEATURE_WF2,
6200 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6201 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6202 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6203 wakaba 1.119 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6204     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6205 wakaba 1.52 }),
6206 wakaba 1.66 check_start => sub {
6207     my ($self, $item, $element_state) = @_;
6208 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6209     $self->{onerror}->(node => $item->{node},
6210     type => 'multiple labelable fae',
6211     level => $self->{level}->{must});
6212     } else {
6213     $self->{flag}->{has_labelable} = 2;
6214     }
6215 wakaba 1.162
6216     ## ISSUE: "The value attribute must not be present unless the form
6217     ## [content] attribute is present.": Wrong?
6218 wakaba 1.139
6219 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6220     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6221 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6222     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6223 wakaba 1.138
6224     $element_state->{id_type} = 'labelable';
6225 wakaba 1.66 },
6226 wakaba 1.52 };
6227    
6228     $Element->{$HTML_NS}->{label} = {
6229 wakaba 1.139 %HTMLPhrasingContentChecker,
6230 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC
6231     | FEATURE_XHTML2_ED,
6232 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6233 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6234 wakaba 1.138 for => sub {
6235     my ($self, $attr) = @_;
6236    
6237     ## NOTE: MUST be an ID of a labelable element.
6238    
6239     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6240     },
6241 wakaba 1.136 form => $HTMLFormAttrChecker,
6242 wakaba 1.52 }, {
6243     %HTMLAttrStatus,
6244 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6245 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
6246 wakaba 1.119 for => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6247     form => FEATURE_HTML5_DEFAULT,
6248 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6249 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6250     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6251     }),
6252 wakaba 1.139 check_start => sub {
6253     my ($self, $item, $element_state) = @_;
6254     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6255    
6256     $element_state->{has_label_original} = $self->{flag}->{has_label};
6257     $self->{flag}->{has_label} = 1;
6258     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6259 wakaba 1.155 $self->{flag}->{has_labelable}
6260     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6261 wakaba 1.139
6262     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6263     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6264     },
6265     check_end => sub {
6266     my ($self, $item, $element_state) = @_;
6267     $self->_remove_minus_elements ($element_state);
6268    
6269     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6270     $self->{flag}->{has_labelable}
6271     = $element_state->{has_labelable_original};
6272     }
6273     delete $self->{flag}->{has_label}
6274     unless $element_state->{has_label_original};
6275     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6276    
6277     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6278    
6279     $HTMLPhrasingContentChecker{check_end}->(@_);
6280     },
6281 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6282     };
6283    
6284     $Element->{$HTML_NS}->{select} = {
6285 wakaba 1.121 %HTMLChecker,
6286 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6287 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6288     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6289 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6290 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6291 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6292 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6293 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6294 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6295 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6296 wakaba 1.136 form => $HTMLFormAttrChecker,
6297 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6298 wakaba 1.165 name => $FormControlNameAttrChecker,
6299 wakaba 1.163 ## TODO: tests for on*
6300 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6301     onforminput => $HTMLEventHandlerAttrChecker,
6302     oninput => $HTMLEventHandlerAttrChecker,
6303 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6304 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6305 wakaba 1.52 }, {
6306     %HTMLAttrStatus,
6307     %HTMLM12NCommonAttrStatus,
6308 wakaba 1.56 accesskey => FEATURE_WF2,
6309 wakaba 1.119 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6310 wakaba 1.56 data => FEATURE_WF2,
6311 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6312     dataformatas => FEATURE_HTML4_REC_RESERVED,
6313     datasrc => FEATURE_HTML4_REC_RESERVED,
6314 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6315     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6316 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6317 wakaba 1.119 multiple => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6318     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6319 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6320     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6321 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6322     onforminput => FEATURE_WF2_INFORMATIVE,
6323 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6324 wakaba 1.126 oninput => FEATURE_WF2,
6325 wakaba 1.56 oninvalid => FEATURE_WF2,
6326 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6327     sdapref => FEATURE_HTML20_RFC,
6328 wakaba 1.119 size => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6329 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6330     }),
6331 wakaba 1.66 check_start => sub {
6332     my ($self, $item, $element_state) = @_;
6333 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6334     $self->{onerror}->(node => $item->{node},
6335     type => 'multiple labelable fae',
6336     level => $self->{level}->{must});
6337     } else {
6338     $self->{flag}->{has_labelable} = 2;
6339     }
6340 wakaba 1.66
6341     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6342     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6343 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6344     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6345 wakaba 1.138
6346     $element_state->{id_type} = 'labelable';
6347 wakaba 1.66 },
6348 wakaba 1.121 check_child_element => sub {
6349 wakaba 1.163 ## NOTE: (option | optgroup)*
6350    
6351 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6352     $child_is_transparent, $element_state) = @_;
6353 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6354     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6355 wakaba 1.121 $self->{onerror}->(node => $child_el,
6356     type => 'element not allowed:minus',
6357     level => $self->{level}->{must});
6358     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6359     #
6360     } elsif ($child_nsuri eq $HTML_NS and
6361     {
6362     option => 1, optgroup => 1,
6363     }->{$child_ln}) {
6364     #
6365     } else {
6366     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6367     level => $self->{level}->{must});
6368     }
6369     },
6370     check_child_text => sub {
6371     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6372     if ($has_significant) {
6373     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6374     level => $self->{level}->{must});
6375     }
6376     },
6377 wakaba 1.52 };
6378 wakaba 1.1
6379 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6380 wakaba 1.121 %HTMLPhrasingContentChecker,
6381 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6382 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6383     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6384     }, {
6385 wakaba 1.52 %HTMLAttrStatus,
6386 wakaba 1.56 data => FEATURE_WF2,
6387 wakaba 1.52 }),
6388 wakaba 1.66 check_start => sub {
6389     my ($self, $item, $element_state) = @_;
6390    
6391 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6392    
6393 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6394 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6395     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6396 wakaba 1.158
6397     $element_state->{id_type} = 'datalist';
6398 wakaba 1.66 },
6399 wakaba 1.121 ## NOTE: phrasing | option*
6400     check_child_element => sub {
6401     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6402     $child_is_transparent, $element_state) = @_;
6403 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6404     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6405 wakaba 1.121 $self->{onerror}->(node => $child_el,
6406     type => 'element not allowed:minus',
6407     level => $self->{level}->{must});
6408     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6409     #
6410     } elsif ($element_state->{phase} eq 'phrasing') {
6411     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6412     #
6413     } else {
6414     $self->{onerror}->(node => $child_el,
6415     type => 'element not allowed:phrasing',
6416     level => $self->{level}->{must});
6417     }
6418     } elsif ($element_state->{phase} eq 'option') {
6419     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6420     #
6421     } else {
6422     $self->{onerror}->(node => $child_el,
6423     type => 'element not allowed',
6424     level => $self->{level}->{must});
6425     }
6426     } elsif ($element_state->{phase} eq 'any') {
6427     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6428     $element_state->{phase} = 'phrasing';
6429     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6430     $element_state->{phase} = 'option';
6431     } else {
6432     $self->{onerror}->(node => $child_el,
6433     type => 'element not allowed',
6434     level => $self->{level}->{must});
6435     }
6436     } else {
6437     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6438     }
6439     },
6440     check_child_text => sub {
6441     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6442     if ($has_significant) {
6443     if ($element_state->{phase} eq 'phrasing') {
6444     #
6445     } elsif ($element_state->{phase} eq 'any') {
6446     $element_state->{phase} = 'phrasing';
6447     } else {
6448     $self->{onerror}->(node => $child_node,
6449     type => 'character not allowed',
6450     level => $self->{level}->{must});
6451     }
6452     }
6453     },
6454     check_end => sub {
6455     my ($self, $item, $element_state) = @_;
6456     if ($element_state->{phase} eq 'phrasing') {
6457     if ($element_state->{has_significant}) {
6458     $item->{real_parent_state}->{has_significant} = 1;
6459     } elsif ($item->{transparent}) {
6460     #
6461     } else {
6462     $self->{onerror}->(node => $item->{node},
6463     type => 'no significant content',
6464     level => $self->{level}->{should});
6465     }
6466     } else {
6467     ## NOTE: Since the content model explicitly allows a |datalist| element
6468     ## being empty, we don't raise "no significant content" error for this
6469     ## element when there is no element. (We should raise an error for
6470     ## |<datalist><br></datalist>|, however.)
6471     ## NOTE: As a side-effect, when the |datalist| element only contains
6472     ## non-conforming content, then the |phase| flag has not changed from
6473     ## |any|, no "no significant content" error is raised neither.
6474     $HTMLChecker{check_end}->(@_);
6475     }
6476     },
6477 wakaba 1.52 };
6478 wakaba 1.49
6479 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6480 wakaba 1.121 %HTMLChecker,
6481 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6482 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6483     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6484 wakaba 1.164 label => sub {},
6485 wakaba 1.52 }, {
6486     %HTMLAttrStatus,
6487     %HTMLM12NCommonAttrStatus,
6488 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6489     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6490 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6491 wakaba 1.52 }),
6492 wakaba 1.164 check_attrs2 => sub {
6493     my ($self, $item, $element_state) = @_;
6494    
6495     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6496     $self->{onerror}->(node => $item->{node},
6497     type => 'attribute missing',
6498     text => 'label',
6499     level => $self->{level}->{must});
6500     }
6501     },
6502 wakaba 1.121 check_child_element => sub {
6503     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6504     $child_is_transparent, $element_state) = @_;
6505 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6506     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6507 wakaba 1.121 $self->{onerror}->(node => $child_el,
6508     type => 'element not allowed:minus',
6509     level => $self->{level}->{must});
6510     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6511     #
6512     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6513     #
6514     } else {
6515     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6516     level => $self->{level}->{must});
6517     }
6518     },
6519     check_child_text => sub {
6520     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6521     if ($has_significant) {
6522     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6523     level => $self->{level}->{must});
6524     }
6525     },
6526 wakaba 1.52 };
6527    
6528     $Element->{$HTML_NS}->{option} = {
6529     %HTMLTextChecker,
6530 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6531 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6532     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6533 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6534     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6535     value => sub {}, ## NOTE: No restriction.
6536 wakaba 1.52 }, {
6537     %HTMLAttrStatus,
6538     %HTMLM12NCommonAttrStatus,
6539 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6540     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6541 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6542 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6543     sdapref => FEATURE_HTML20_RFC,
6544 wakaba 1.119 selected => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6545     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6546 wakaba 1.52 }),
6547     };
6548 wakaba 1.49
6549 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6550     %HTMLTextChecker,
6551 wakaba 1.121 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6552 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6553 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6554 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6555 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6556 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6557 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6558 wakaba 1.136 form => $HTMLFormAttrChecker,
6559 wakaba 1.56 ## TODO: inputmode [WF2]
6560 wakaba 1.164 maxlength => sub {
6561     my ($self, $attr, $item, $element_state) = @_;
6562    
6563     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6564    
6565 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6566 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6567     ## results in a number.
6568     my $max_allowed_value_length = 0+$1;
6569    
6570     ## ISSUE: "The the purposes of this requirement," (typo)
6571    
6572     ## ISSUE: This constraint is applied w/o CRLF normalization to
6573     ## |value| attribute, but w/ CRLF normalization to
6574     ## concept-value.
6575     my $value = $item->{node}->text_content;
6576     if (defined $value) {
6577     my $codepoint_length = length $value;
6578    
6579     if ($codepoint_length > $max_allowed_value_length) {
6580     $self->{onerror}->(node => $item->{node},
6581     type => 'value too long',
6582     level => $self->{level}->{must});
6583     }
6584     }
6585     }
6586     },
6587 wakaba 1.165 name => $FormControlNameAttrChecker,
6588 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6589     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6590     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6591 wakaba 1.161 pattern => $PatternAttrChecker,
6592 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6593 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6594 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6595     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6596     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6597 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6598 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6599 wakaba 1.52 }, {
6600     %HTMLAttrStatus,
6601     %HTMLM12NCommonAttrStatus,
6602 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6603 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6604 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
6605 wakaba 1.121 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6606     cols => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6607 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6608 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6609     datasrc => FEATURE_HTML4_REC_RESERVED,
6610 wakaba 1.121 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6611     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6612 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6613 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6614 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6615     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6616 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6617     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6618     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6619 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6620     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6621     oninput => FEATURE_WF2, ## TODO: tests
6622     oninvalid => FEATURE_WF2, ## TODO: tests
6623 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6624 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6625 wakaba 1.121 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6626     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6627     rows => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6628 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6629     sdapref => FEATURE_HTML20_RFC,
6630 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6631 wakaba 1.121 wrap => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6632 wakaba 1.52 }),
6633 wakaba 1.66 check_start => sub {
6634     my ($self, $item, $element_state) = @_;
6635 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6636     $self->{onerror}->(node => $item->{node},
6637     type => 'multiple labelable fae',
6638     level => $self->{level}->{must});
6639     } else {
6640     $self->{flag}->{has_labelable} = 2;
6641     }
6642 wakaba 1.164
6643     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6644     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6645     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6646    
6647     $element_state->{id_type} = 'labelable';
6648     },
6649     check_attrs2 => sub {
6650     my ($self, $item, $element_state) = @_;
6651 wakaba 1.66
6652 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6653     not $item->{node}->has_attribute_ns (undef, 'title')) {
6654     ## NOTE: WF2 (dropped by HTML5)
6655     $self->{onerror}->(node => $item->{node},
6656     type => 'attribute missing',
6657     text => 'title',
6658     level => $self->{level}->{should});
6659     }
6660    
6661 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6662     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6663     if (defined $wrap) {
6664     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6665     if ($wrap eq 'hard') {
6666     $self->{onerror}->(node => $item->{node},
6667     type => 'attribute missing',
6668     text => 'cols',
6669     level => $self->{level}->{must});
6670     }
6671     }
6672     }
6673 wakaba 1.66 },
6674 wakaba 1.52 };
6675 wakaba 1.49
6676 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6677 wakaba 1.121 %HTMLPhrasingContentChecker,
6678     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6679 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6680 wakaba 1.165 for => sub {
6681     my ($self, $attr) = @_;
6682    
6683     ## NOTE: "Unordered set of unique space-separated tokens".
6684    
6685     my %word;
6686     for my $word (grep {length $_}
6687     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6688     unless ($word{$word}) {
6689     $word{$word} = 1;
6690     push @{$self->{idref}}, ['any', $word, $attr];
6691     } else {
6692     $self->{onerror}->(node => $attr, type => 'duplicate token',
6693     value => $word,
6694     level => $self->{level}->{must});
6695     }
6696     }
6697     },
6698 wakaba 1.136 form => $HTMLFormAttrChecker,
6699 wakaba 1.165 name => $FormControlNameAttrChecker,
6700     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6701     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6702 wakaba 1.56 }, {
6703 wakaba 1.52 %HTMLAttrStatus,
6704 wakaba 1.121 for => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6705     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6706     name => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6707 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6708     onformchange => FEATURE_WF2,
6709     onforminput => FEATURE_WF2,
6710 wakaba 1.52 }),
6711     };
6712    
6713     $Element->{$HTML_NS}->{isindex} = {
6714     %HTMLEmptyChecker,
6715 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6716     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6717 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6718     prompt => sub {}, ## NOTE: Text [M12N]
6719     }, {
6720     %HTMLAttrStatus,
6721 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6722     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6723     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6724     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6725 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6726 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6727 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6728     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6729 wakaba 1.52 }),
6730     ## TODO: Tests
6731     ## TODO: Tests for <nest/> in <isindex>
6732 wakaba 1.66 check_start => sub {
6733     my ($self, $item, $element_state) = @_;
6734    
6735     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6736 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6737     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6738 wakaba 1.66 },
6739 wakaba 1.52 };
6740 wakaba 1.49
6741 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6742 wakaba 1.40 %HTMLChecker,
6743 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6744 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6745 wakaba 1.91 charset => sub {
6746     my ($self, $attr) = @_;
6747    
6748     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6749     $self->{onerror}->(type => 'attribute not allowed',
6750     node => $attr,
6751 wakaba 1.104 level => $self->{level}->{must});
6752 wakaba 1.91 }
6753    
6754     $HTMLCharsetChecker->($attr->value, @_);
6755     },
6756 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6757 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6758 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6759     async => $GetHTMLBooleanAttrChecker->('async'),
6760 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6761 wakaba 1.49 }, {
6762     %HTMLAttrStatus,
6763 wakaba 1.153 async => FEATURE_HTML5_WD,
6764     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6765     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6766 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6767     for => FEATURE_HTML4_REC_RESERVED,
6768 wakaba 1.154 href => FEATURE_RDFA_REC,
6769 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6770 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6771 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6772     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6773 wakaba 1.9 }),
6774 wakaba 1.40 check_start => sub {
6775     my ($self, $item, $element_state) = @_;
6776 wakaba 1.1
6777 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6778     $element_state->{must_be_empty} = 1;
6779 wakaba 1.1 } else {
6780     ## NOTE: No content model conformance in HTML5 spec.
6781 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6782     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6783 wakaba 1.1 if ((defined $type and $type eq '') or
6784     (defined $language and $language eq '')) {
6785     $type = 'text/javascript';
6786     } elsif (defined $type) {
6787     #
6788     } elsif (defined $language) {
6789     $type = 'text/' . $language;
6790     } else {
6791     $type = 'text/javascript';
6792     }
6793 wakaba 1.93
6794     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6795     $type = "$1/$2";
6796     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6797     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6798     }
6799     $element_state->{script_type} = $type;
6800 wakaba 1.40 }
6801 wakaba 1.66
6802     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6803 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6804     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6805 wakaba 1.107
6806     $element_state->{text} = '';
6807 wakaba 1.40 },
6808     check_child_element => sub {
6809     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6810     $child_is_transparent, $element_state) = @_;
6811 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6812     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6813 wakaba 1.40 $self->{onerror}->(node => $child_el,
6814     type => 'element not allowed:minus',
6815 wakaba 1.104 level => $self->{level}->{must});
6816 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6817     #
6818     } else {
6819     if ($element_state->{must_be_empty}) {
6820     $self->{onerror}->(node => $child_el,
6821 wakaba 1.104 type => 'element not allowed:empty',
6822     level => $self->{level}->{must});
6823 wakaba 1.40 }
6824     }
6825     },
6826     check_child_text => sub {
6827     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6828     if ($has_significant and
6829     $element_state->{must_be_empty}) {
6830     $self->{onerror}->(node => $child_node,
6831 wakaba 1.104 type => 'character not allowed:empty',
6832     level => $self->{level}->{must});
6833 wakaba 1.40 }
6834 wakaba 1.115 $element_state->{text} .= $child_node->data;
6835 wakaba 1.40 },
6836     check_end => sub {
6837     my ($self, $item, $element_state) = @_;
6838     unless ($element_state->{must_be_empty}) {
6839 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6840     ## NOTE: XML content should be checked by THIS instance of checker
6841     ## as part of normal tree validation.
6842 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6843     type => 'XML script lang',
6844     text => $element_state->{script_type},
6845     level => $self->{level}->{uncertain});
6846     ## ISSUE: Should we raise some kind of error for
6847     ## <script type="text/xml">aaaaa</script>?
6848     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6849 wakaba 1.93 } else {
6850     $self->{onsubdoc}->({s => $element_state->{text},
6851     container_node => $item->{node},
6852     media_type => $element_state->{script_type},
6853     is_char_string => 1});
6854     }
6855 wakaba 1.40
6856     $HTMLChecker{check_end}->(@_);
6857 wakaba 1.1 }
6858     },
6859 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6860     ## NOTE: "When used to include script data, the script data must be embedded
6861     ## inline, the format of the data must be given using the type attribute,
6862     ## and the src attribute must not be specified." - not testable.
6863     ## TODO: It would be possible to err <script type=text/plain src=...>
6864 wakaba 1.1 };
6865 wakaba 1.25 ## ISSUE: Significant check and text child node
6866 wakaba 1.1
6867     ## NOTE: When script is disabled.
6868     $Element->{$HTML_NS}->{noscript} = {
6869 wakaba 1.40 %HTMLTransparentChecker,
6870 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6871 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6872     %HTMLAttrStatus,
6873     %HTMLM12NCommonAttrStatus,
6874 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6875 wakaba 1.49 }),
6876 wakaba 1.40 check_start => sub {
6877     my ($self, $item, $element_state) = @_;
6878 wakaba 1.3
6879 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6880 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6881     level => $self->{level}->{must});
6882 wakaba 1.3 }
6883    
6884 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6885     $self->_add_minus_elements ($element_state,
6886     {$HTML_NS => {noscript => 1}});
6887     }
6888 wakaba 1.79
6889     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6890     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6891 wakaba 1.3 },
6892 wakaba 1.40 check_child_element => sub {
6893     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6894     $child_is_transparent, $element_state) = @_;
6895     if ($self->{flag}->{in_head}) {
6896 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6897     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6898 wakaba 1.40 $self->{onerror}->(node => $child_el,
6899     type => 'element not allowed:minus',
6900 wakaba 1.104 level => $self->{level}->{must});
6901 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6902     #
6903     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6904     #
6905     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6906     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6907     $self->{onerror}->(node => $child_el,
6908     type => 'element not allowed:head noscript',
6909 wakaba 1.104 level => $self->{level}->{must});
6910 wakaba 1.40 }
6911     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6912 wakaba 1.47 my $http_equiv_attr
6913     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6914     if ($http_equiv_attr) {
6915     ## TODO: case
6916     if (lc $http_equiv_attr->value eq 'content-type') {
6917 wakaba 1.40 $self->{onerror}->(node => $child_el,
6918 wakaba 1.34 type => 'element not allowed:head noscript',
6919 wakaba 1.104 level => $self->{level}->{must});
6920 wakaba 1.47 } else {
6921     #
6922 wakaba 1.3 }
6923 wakaba 1.47 } else {
6924     $self->{onerror}->(node => $child_el,
6925     type => 'element not allowed:head noscript',
6926 wakaba 1.104 level => $self->{level}->{must});
6927 wakaba 1.3 }
6928 wakaba 1.40 } else {
6929     $self->{onerror}->(node => $child_el,
6930     type => 'element not allowed:head noscript',
6931 wakaba 1.104 level => $self->{level}->{must});
6932 wakaba 1.40 }
6933     } else {
6934     $HTMLTransparentChecker{check_child_element}->(@_);
6935     }
6936     },
6937     check_child_text => sub {
6938     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6939     if ($self->{flag}->{in_head}) {
6940     if ($has_significant) {
6941     $self->{onerror}->(node => $child_node,
6942 wakaba 1.104 type => 'character not allowed',
6943     level => $self->{level}->{must});
6944 wakaba 1.3 }
6945     } else {
6946 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6947     }
6948     },
6949     check_end => sub {
6950     my ($self, $item, $element_state) = @_;
6951     $self->_remove_minus_elements ($element_state);
6952     if ($self->{flag}->{in_head}) {
6953     $HTMLChecker{check_end}->(@_);
6954     } else {
6955     $HTMLPhrasingContentChecker{check_end}->(@_);
6956 wakaba 1.3 }
6957 wakaba 1.1 },
6958     };
6959 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6960 wakaba 1.1
6961     $Element->{$HTML_NS}->{'event-source'} = {
6962 wakaba 1.40 %HTMLEmptyChecker,
6963 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6964     check_attrs => $GetHTMLAttrsChecker->({
6965     src => $HTMLURIAttrChecker,
6966     }, {
6967     %HTMLAttrStatus,
6968     src => FEATURE_HTML5_LC_DROPPED,
6969     }),
6970     check_start => sub {
6971     my ($self, $item, $element_state) = @_;
6972    
6973     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6974     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6975     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6976     },
6977     };
6978    
6979     $Element->{$HTML_NS}->{eventsource} = {
6980     %HTMLEmptyChecker,
6981 wakaba 1.153 status => FEATURE_HTML5_WD,
6982 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6983 wakaba 1.1 src => $HTMLURIAttrChecker,
6984 wakaba 1.50 }, {
6985     %HTMLAttrStatus,
6986 wakaba 1.153 src => FEATURE_HTML5_WD,
6987 wakaba 1.1 }),
6988 wakaba 1.66 check_start => sub {
6989     my ($self, $item, $element_state) = @_;
6990    
6991     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6992 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6993     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6994 wakaba 1.66 },
6995 wakaba 1.1 };
6996    
6997     $Element->{$HTML_NS}->{details} = {
6998 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
6999 wakaba 1.153 status => FEATURE_HTML5_LC,
7000 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7001 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
7002 wakaba 1.50 }, {
7003     %HTMLAttrStatus,
7004 wakaba 1.153 open => FEATURE_HTML5_LC,
7005 wakaba 1.1 }),
7006     };
7007    
7008     $Element->{$HTML_NS}->{datagrid} = {
7009 wakaba 1.72 %HTMLFlowContentChecker,
7010 wakaba 1.48 status => FEATURE_HTML5_WD,
7011 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7012 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7013     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7014 wakaba 1.50 }, {
7015     %HTMLAttrStatus,
7016     disabled => FEATURE_HTML5_WD,
7017     multiple => FEATURE_HTML5_WD,
7018 wakaba 1.1 }),
7019 wakaba 1.40 check_start => sub {
7020     my ($self, $item, $element_state) = @_;
7021 wakaba 1.1
7022 wakaba 1.40 $self->_add_minus_elements ($element_state,
7023     {$HTML_NS => {a => 1, datagrid => 1}});
7024     $element_state->{phase} = 'any';
7025 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7026     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7027 wakaba 1.40 },
7028 wakaba 1.95 ## NOTE: Flow -(text* (table|select|datalist) Flow*) | table | select |
7029     ## datalist | Empty
7030 wakaba 1.40 check_child_element => sub {
7031     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7032     $child_is_transparent, $element_state) = @_;
7033 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7034     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7035 wakaba 1.40 $self->{onerror}->(node => $child_el,
7036     type => 'element not allowed:minus',
7037 wakaba 1.104 level => $self->{level}->{must});
7038 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7039     #
7040 wakaba 1.72 } elsif ($element_state->{phase} eq 'flow') {
7041     if ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
7042 wakaba 1.44 if (not $element_state->{has_element} and
7043 wakaba 1.40 $child_nsuri eq $HTML_NS and
7044 wakaba 1.95 {
7045     table => 1, select => 1, datalist => 1,
7046     }->{$child_ln}) {
7047 wakaba 1.40 $self->{onerror}->(node => $child_el,
7048 wakaba 1.104 type => 'element not allowed',
7049     level => $self->{level}->{must});
7050 wakaba 1.40 } else {
7051 wakaba 1.8 #
7052 wakaba 1.1 }
7053 wakaba 1.40 } else {
7054     $self->{onerror}->(node => $child_el,
7055 wakaba 1.121 type => 'element not allowed', ## TODO: :flow
7056 wakaba 1.104 level => $self->{level}->{must});
7057 wakaba 1.40 }
7058 wakaba 1.43 $element_state->{has_element} = 1;
7059 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
7060     if ($child_nsuri eq $HTML_NS and
7061     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
7062     $element_state->{phase} = 'none';
7063 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
7064 wakaba 1.40 $element_state->{has_element} = 1;
7065 wakaba 1.72 $element_state->{phase} = 'flow';
7066 wakaba 1.40 } else {
7067     $self->{onerror}->(node => $child_el,
7068 wakaba 1.104 type => 'element not allowed',
7069     level => $self->{level}->{must});
7070 wakaba 1.40 }
7071     } elsif ($element_state->{phase} eq 'none') {
7072     $self->{onerror}->(node => $child_el,
7073 wakaba 1.104 type => 'element not allowed',
7074     level => $self->{level}->{must});
7075 wakaba 1.40 } else {
7076     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
7077     }
7078     },
7079     check_child_text => sub {
7080     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7081     if ($has_significant) {
7082 wakaba 1.72 if ($element_state->{phase} eq 'flow') {
7083 wakaba 1.40 #
7084     } elsif ($element_state->{phase} eq 'any') {
7085 wakaba 1.72 $element_state->{phase} = 'flow';
7086 wakaba 1.40 } else {
7087     $self->{onerror}->(node => $child_node,
7088 wakaba 1.104 type => 'character not allowed',
7089     level => $self->{level}->{must});
7090 wakaba 1.1 }
7091     }
7092 wakaba 1.40 },
7093     check_end => sub {
7094     my ($self, $item, $element_state) = @_;
7095     $self->_remove_minus_elements ($element_state);
7096 wakaba 1.1
7097 wakaba 1.95 if ($element_state->{phase} eq 'flow') {
7098     if ($element_state->{has_significant}) {
7099     $item->{real_parent_state}->{has_significant} = 1;
7100     } elsif ($item->{transparent}) {
7101     #
7102     } else {
7103     $self->{onerror}->(node => $item->{node},
7104 wakaba 1.104 type => 'no significant content',
7105 wakaba 1.110 level => $self->{level}->{should});
7106 wakaba 1.95 }
7107     } else {
7108     ## NOTE: Since the content model explicitly allows a |datagird| element
7109     ## being empty, we don't raise "no significant content" error for this
7110     ## element when there is no element. (We should raise an error for
7111     ## |<datagrid><br></datagrid>|, however.)
7112     ## NOTE: As a side-effect, when the |datagrid| element only contains
7113     ## non-conforming content, then the |phase| flag has not changed from
7114     ## |any|, no "no significant content" error is raised neither.
7115     ## NOTE: Another side-effect of the current implementation:
7116     ## |<daragrid><datagrid/></datagrid>| has no "no significant content"
7117     ## error at all.
7118 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7119     }
7120     },
7121 wakaba 1.1 };
7122    
7123     $Element->{$HTML_NS}->{command} = {
7124 wakaba 1.40 %HTMLEmptyChecker,
7125 wakaba 1.48 status => FEATURE_HTML5_WD,
7126 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7127 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7128     default => $GetHTMLBooleanAttrChecker->('default'),
7129     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7130     icon => $HTMLURIAttrChecker,
7131     label => sub { }, ## NOTE: No conformance creteria
7132     radiogroup => sub { }, ## NOTE: No conformance creteria
7133     type => sub {
7134     my ($self, $attr) = @_;
7135     my $value = $attr->value;
7136     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7137 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7138     level => $self->{level}->{must});
7139 wakaba 1.1 }
7140     },
7141 wakaba 1.50 }, {
7142     %HTMLAttrStatus,
7143     checked => FEATURE_HTML5_WD,
7144     default => FEATURE_HTML5_WD,
7145     disabled => FEATURE_HTML5_WD,
7146     icon => FEATURE_HTML5_WD,
7147     label => FEATURE_HTML5_WD,
7148     radiogroup => FEATURE_HTML5_WD,
7149     type => FEATURE_HTML5_WD,
7150 wakaba 1.1 }),
7151 wakaba 1.66 check_start => sub {
7152     my ($self, $item, $element_state) = @_;
7153    
7154     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7155 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7156     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7157 wakaba 1.66 },
7158 wakaba 1.115 };
7159    
7160     $Element->{$HTML_NS}->{bb} = {
7161     %HTMLPhrasingContentChecker,
7162 wakaba 1.153 status => FEATURE_HTML5_WD,
7163 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7164     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7165     }, {
7166     %HTMLAttrStatus,
7167 wakaba 1.153 type => FEATURE_HTML5_WD,
7168 wakaba 1.115 }),
7169 wakaba 1.130 check_start => sub {
7170     my ($self, $item, $element_state) = @_;
7171     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7172    
7173     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7174     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7175     },
7176     check_end => sub {
7177     my ($self, $item, $element_state) = @_;
7178     $self->_remove_minus_elements ($element_state);
7179    
7180     $HTMLTransparentChecker{check_end}->(@_);
7181     },
7182 wakaba 1.1 };
7183    
7184     $Element->{$HTML_NS}->{menu} = {
7185 wakaba 1.40 %HTMLPhrasingContentChecker,
7186 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7187     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7188     ## NOTE: We don't want any |menu| element warned as deprecated.
7189 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7190 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7191 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7192 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7193     ## implementation, it does not match.)
7194 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7195     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7196 wakaba 1.49 }, {
7197     %HTMLAttrStatus,
7198     %HTMLM12NCommonAttrStatus,
7199 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7200 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7201 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7202 wakaba 1.50 label => FEATURE_HTML5_WD,
7203 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7204 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7205     sdapref => FEATURE_HTML20_RFC,
7206 wakaba 1.50 type => FEATURE_HTML5_WD,
7207 wakaba 1.1 }),
7208 wakaba 1.40 check_start => sub {
7209     my ($self, $item, $element_state) = @_;
7210     $element_state->{phase} = 'li or phrasing';
7211     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7212     $self->{flag}->{in_menu} = 1;
7213 wakaba 1.79
7214     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7215     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7216 wakaba 1.135 $element_state->{id_type} = 'menu';
7217 wakaba 1.40 },
7218     check_child_element => sub {
7219     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7220     $child_is_transparent, $element_state) = @_;
7221 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7222     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7223 wakaba 1.40 $self->{onerror}->(node => $child_el,
7224     type => 'element not allowed:minus',
7225 wakaba 1.104 level => $self->{level}->{must});
7226 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7227     #
7228     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7229     if ($element_state->{phase} eq 'li') {
7230     #
7231     } elsif ($element_state->{phase} eq 'li or phrasing') {
7232     $element_state->{phase} = 'li';
7233     } else {
7234 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7235     level => $self->{level}->{must});
7236 wakaba 1.40 }
7237     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7238     if ($element_state->{phase} eq 'phrasing') {
7239     #
7240     } elsif ($element_state->{phase} eq 'li or phrasing') {
7241     $element_state->{phase} = 'phrasing';
7242     } else {
7243 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7244     level => $self->{level}->{must});
7245 wakaba 1.40 }
7246     } else {
7247 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7248     level => $self->{level}->{must});
7249 wakaba 1.40 }
7250     },
7251     check_child_text => sub {
7252     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7253     if ($has_significant) {
7254     if ($element_state->{phase} eq 'phrasing') {
7255     #
7256     } elsif ($element_state->{phase} eq 'li or phrasing') {
7257     $element_state->{phase} = 'phrasing';
7258     } else {
7259     $self->{onerror}->(node => $child_node,
7260 wakaba 1.104 type => 'character not allowed',
7261     level => $self->{level}->{must});
7262 wakaba 1.1 }
7263     }
7264 wakaba 1.40 },
7265     check_end => sub {
7266     my ($self, $item, $element_state) = @_;
7267     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7268    
7269     if ($element_state->{phase} eq 'li') {
7270     $HTMLChecker{check_end}->(@_);
7271     } else { # 'phrasing' or 'li or phrasing'
7272     $HTMLPhrasingContentChecker{check_end}->(@_);
7273 wakaba 1.1 }
7274     },
7275 wakaba 1.8 };
7276    
7277     $Element->{$HTML_NS}->{datatemplate} = {
7278 wakaba 1.40 %HTMLChecker,
7279 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7280 wakaba 1.40 check_child_element => sub {
7281     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7282     $child_is_transparent, $element_state) = @_;
7283 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7284     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7285 wakaba 1.40 $self->{onerror}->(node => $child_el,
7286     type => 'element not allowed:minus',
7287 wakaba 1.104 level => $self->{level}->{must});
7288 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7289     #
7290     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7291     #
7292     } else {
7293     $self->{onerror}->(node => $child_el,
7294 wakaba 1.104 type => 'element not allowed:datatemplate',
7295     level => $self->{level}->{must});
7296 wakaba 1.40 }
7297     },
7298     check_child_text => sub {
7299     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7300     if ($has_significant) {
7301 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7302     level => $self->{level}->{must});
7303 wakaba 1.8 }
7304     },
7305     is_xml_root => 1,
7306     };
7307    
7308     $Element->{$HTML_NS}->{rule} = {
7309 wakaba 1.40 %HTMLChecker,
7310 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7311 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7312 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7313 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7314 wakaba 1.50 }, {
7315     %HTMLAttrStatus,
7316     condition => FEATURE_HTML5_AT_RISK,
7317     mode => FEATURE_HTML5_AT_RISK,
7318 wakaba 1.8 }),
7319 wakaba 1.40 check_start => sub {
7320     my ($self, $item, $element_state) = @_;
7321 wakaba 1.79
7322 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7323 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7324     $self->{flag}->{in_rule} = 1;
7325    
7326     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7327     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7328 wakaba 1.40 },
7329     check_child_element => sub { },
7330     check_child_text => sub { },
7331     check_end => sub {
7332     my ($self, $item, $element_state) = @_;
7333 wakaba 1.79
7334 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7335 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7336    
7337 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7338 wakaba 1.8 },
7339     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7340     ## is applied to some conforming data, results in a conforming DOM tree.":
7341     ## We don't check against this.
7342     };
7343    
7344     $Element->{$HTML_NS}->{nest} = {
7345 wakaba 1.40 %HTMLEmptyChecker,
7346 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7347 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7348 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7349     mode => sub {
7350     my ($self, $attr) = @_;
7351     my $value = $attr->value;
7352 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7353 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7354     level => $self->{level}->{must});
7355 wakaba 1.23 }
7356     },
7357 wakaba 1.50 }, {
7358     %HTMLAttrStatus,
7359     filter => FEATURE_HTML5_AT_RISK,
7360     mode => FEATURE_HTML5_AT_RISK,
7361 wakaba 1.8 }),
7362 wakaba 1.1 };
7363    
7364     $Element->{$HTML_NS}->{legend} = {
7365 wakaba 1.40 %HTMLPhrasingContentChecker,
7366 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7367 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7368 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
7369 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
7370     # top => 1, bottom => 1, left => 1, right => 1,
7371     # }),
7372 wakaba 1.167 form => $HTMLFormAttrChecker,
7373 wakaba 1.52 }, {
7374 wakaba 1.49 %HTMLAttrStatus,
7375     %HTMLM12NCommonAttrStatus,
7376     accesskey => FEATURE_M12N10_REC,
7377     align => FEATURE_M12N10_REC_DEPRECATED,
7378 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7379 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7380 wakaba 1.49 }),
7381 wakaba 1.1 };
7382    
7383     $Element->{$HTML_NS}->{div} = {
7384 wakaba 1.72 %HTMLFlowContentChecker,
7385 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
7386 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7387     align => $GetHTMLEnumeratedAttrChecker->({
7388     left => 1, center => 1, right => 1, justify => 1,
7389     }),
7390     }, {
7391 wakaba 1.49 %HTMLAttrStatus,
7392 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7393 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7394     datafld => FEATURE_HTML4_REC_RESERVED,
7395     dataformatas => FEATURE_HTML4_REC_RESERVED,
7396     datasrc => FEATURE_HTML4_REC_RESERVED,
7397 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7398 wakaba 1.49 }),
7399 wakaba 1.66 check_start => sub {
7400     my ($self, $item, $element_state) = @_;
7401    
7402     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7403 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7404     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7405 wakaba 1.66 },
7406 wakaba 1.1 };
7407    
7408 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7409 wakaba 1.72 %HTMLFlowContentChecker,
7410 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7411     check_attrs => $GetHTMLAttrsChecker->({}, {
7412     %HTMLAttrStatus,
7413     %HTMLM12NCommonAttrStatus,
7414 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7415 wakaba 1.64 }),
7416     };
7417    
7418 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7419 wakaba 1.40 %HTMLTransparentChecker,
7420 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7421 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7422     ## TODO: HTML4 |size|, |color|, |face|
7423 wakaba 1.49 }, {
7424     %HTMLAttrStatus,
7425 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7426 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7427 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7428 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7429 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7430     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7431 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7432 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7433     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7434 wakaba 1.49 }),
7435 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7436     ## it is allowed only in a document with the WYSIWYG signature. The
7437     ## checker does not check whether there is the signature, since the
7438     ## signature is dropped, too, and has never been implemented. (In addition,
7439     ## for any |font| element an "element not defined" error is raised anyway,
7440     ## such that we don't have to raise an additional error.)
7441 wakaba 1.1 };
7442 wakaba 1.49
7443 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7444     %HTMLEmptyChecker,
7445     status => FEATURE_M12N10_REC_DEPRECATED,
7446     check_attrs => $GetHTMLAttrsChecker->({
7447     ## TODO: color, face, size
7448     }, {
7449     %HTMLAttrStatus,
7450     color => FEATURE_M12N10_REC_DEPRECATED,
7451     face => FEATURE_M12N10_REC_DEPRECATED,
7452 wakaba 1.153 #id => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
7453     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7454 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7455     }),
7456     };
7457    
7458 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7459     ## class title id cols rows onload onunload style(x10)
7460     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7461     ## noframes Common, lang(xhtml10)
7462    
7463 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7464 wakaba 1.56
7465 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7466     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7467     ## xmp, listing sdapref[HTML2,0]
7468    
7469 wakaba 1.56 =pod
7470    
7471 wakaba 1.61 HTML 2.0 nextid @n
7472    
7473     RFC 2659: CERTS CRYPTOPTS
7474    
7475     ISO-HTML: pre-html, divN
7476 wakaba 1.82
7477     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7478     di (Common), nl (Common), handler (Common, type), standby (Common),
7479     summary (Common)
7480    
7481 wakaba 1.97 Access & XHTML2: access (LC)
7482 wakaba 1.82
7483     XML Events & XForms (for XHTML2 support; very, very low priority)
7484 wakaba 1.61
7485 wakaba 1.56 =cut
7486 wakaba 1.61
7487     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7488     ## We added them only to |a|. |link| and |form| might also allow them
7489     ## in theory.
7490 wakaba 1.1
7491     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7492    
7493     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24