/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.191 - (hide annotations) (download)
Sun Aug 16 07:42:07 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.190: +12 -0 lines
++ whatpm/t/ChangeLog	16 Aug 2009 07:32:33 -0000
	* ContentChecker.t: Added a new test data file.

2009-08-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/t/dom-conformance/ChangeLog	16 Aug 2009 07:33:45 -0000
	* html-interactive-2.dat: New test data file.

2009-08-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	16 Aug 2009 07:33:26 -0000
	* HTML.pm: defer="" w/o src="" is no longer allowed (HTML5
	revision 3550).

2009-08-16  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.174 ## --- Feature Status ---
10    
11 wakaba 1.187 sub FEATURE_HTML5_REC () {
12 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
13 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15 wakaba 1.187
16     ## Strictly speaking, HTML5's "implemented and widely deployed"
17     ## status does not necessarily satisfy the condition for
18     ## FEATURE_STATUS_REC, since there is no test cases for most of
19     ## features marked as "implemented" in HTML5. Nevertheless, we
20     ## special-case HTML5's this status as if that had passed the CR
21     ## phase, considering HTML's history.
22 wakaba 1.89 }
23 wakaba 1.187
24 wakaba 1.154 sub FEATURE_HTML5_CR () {
25     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
26     Whatpm::ContentChecker::FEATURE_STATUS_CR |
27     Whatpm::ContentChecker::FEATURE_ALLOWED
28     }
29 wakaba 1.54 sub FEATURE_HTML5_LC () {
30 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
31 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
32     Whatpm::ContentChecker::FEATURE_ALLOWED
33     }
34     sub FEATURE_HTML5_AT_RISK () {
35 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
36     ## status.
37 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40     sub FEATURE_HTML5_WD () {
41 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
42 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
43     Whatpm::ContentChecker::FEATURE_ALLOWED
44     }
45     sub FEATURE_HTML5_FD () {
46 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
47 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
48     Whatpm::ContentChecker::FEATURE_ALLOWED
49     }
50     sub FEATURE_HTML5_DEFAULT () {
51 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
52 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
53     Whatpm::ContentChecker::FEATURE_ALLOWED
54 wakaba 1.49 }
55 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
56 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
57     ## comments, but then dropped.
58 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
59     }
60 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
61 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
62     ## then dropped.
63 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
64     }
65 wakaba 1.154
66 wakaba 1.119 sub FEATURE_WF2X () {
67 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
68     ## incorporated into the HTML5 spec.
69 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
70     }
71 wakaba 1.54 sub FEATURE_WF2 () {
72 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
73     ## merged into HTML5.
74 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
75 wakaba 1.54 }
76 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
77 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
78     ## were not merged into HTML5.
79 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
80     }
81 wakaba 1.49
82 wakaba 1.154 sub FEATURE_RDFA_REC () {
83     Whatpm::ContentChecker::FEATURE_STATUS_REC
84 wakaba 1.121 }
85 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
86     ## NOTE: The feature that was defined in a RDFa last call working
87     ## draft, but then dropped.
88 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
89     }
90 wakaba 1.58
91     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
92     ## attribute can be used- the only requirements for that matter is:
93     ## "the attribute MUST be referenced using its namespace-qualified form" (and
94     ## this is a host language conformance!).
95 wakaba 1.82 sub FEATURE_ROLE_LC () {
96     Whatpm::ContentChecker::FEATURE_STATUS_LC
97     }
98    
99     sub FEATURE_XHTML2_ED () {
100 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
101     ## "http://www.w3.org/1999/xhtml".
102 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
103     }
104 wakaba 1.58
105 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
106 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
107     ## M12N).
108     Whatpm::ContentChecker::FEATURE_STATUS_REC
109 wakaba 1.55 }
110     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
111 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
112     ## features.
113     Whatpm::ContentChecker::FEATURE_STATUS_REC |
114 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
115     }
116    
117 wakaba 1.154 sub FEATURE_RUBY_REC () {
118     Whatpm::ContentChecker::FEATURE_STATUS_CR
119 wakaba 1.82 }
120    
121 wakaba 1.154 sub FEATURE_M12N11_LC () {
122     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
123     Whatpm::ContentChecker::FEATURE_STATUS_REC;
124 wakaba 1.99 }
125    
126 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
127     ## It contains a number of problems. (However, again, it's a REC!)
128 wakaba 1.54 sub FEATURE_M12N10_REC () {
129 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
130 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
131     }
132     sub FEATURE_M12N10_REC_DEPRECATED () {
133     Whatpm::ContentChecker::FEATURE_STATUS_REC |
134     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
135     }
136 wakaba 1.49
137     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
138     ## (second edition). Only missing attributes from M12N10 abstract
139     ## definition are added.
140 wakaba 1.54 sub FEATURE_XHTML10_REC () {
141     Whatpm::ContentChecker::FEATURE_STATUS_CR
142     }
143    
144 wakaba 1.61 ## NOTE: Diff from HTML4.
145     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
146     Whatpm::ContentChecker::FEATURE_STATUS_CR
147     }
148 wakaba 1.58
149 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
150     ## 4.01). Only missing attributes from XHTML10 are added.
151 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
152     Whatpm::ContentChecker::FEATURE_STATUS_WD
153     }
154    
155     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
156     ## rather than presentational attributes (deprecated or not deprecated).
157 wakaba 1.48
158 wakaba 1.61 ## NOTE: Diff from HTML4.
159     sub FEATURE_HTML32_REC_OBSOLETE () {
160     Whatpm::ContentChecker::FEATURE_STATUS_CR |
161     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
162     ## NOTE: Lowercase normative "should".
163     }
164    
165     sub FEATURE_RFC2659 () { ## Experimental RFC
166     Whatpm::ContentChecker::FEATURE_STATUS_CR
167     }
168    
169     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
170     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
171     Whatpm::ContentChecker::FEATURE_STATUS_CR
172     }
173    
174     ## NOTE: Diff from HTML 2.0.
175     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
176     Whatpm::ContentChecker::FEATURE_STATUS_CR
177     }
178    
179     ## NOTE: Diff from HTML 3.2.
180     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
181     Whatpm::ContentChecker::FEATURE_STATUS_CR
182     }
183 wakaba 1.58
184 wakaba 1.174 ## --- Content Model ---
185    
186 wakaba 1.29 ## December 2007 HTML5 Classification
187    
188     my $HTMLMetadataContent = {
189     $HTML_NS => {
190     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
191 wakaba 1.118 'event-source' => 1, eventsource => 1,
192     command => 1, datatemplate => 1,
193 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
194     ## a metadata content other than |head| element.
195     meta => 1,
196     },
197     ## NOTE: RDF is mentioned in the HTML5 spec.
198     ## TODO: Other RDF elements?
199     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
200     };
201    
202 wakaba 1.72 my $HTMLFlowContent = {
203 wakaba 1.29 $HTML_NS => {
204     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
205     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
206     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
207     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
208 wakaba 1.119 form => 1, fieldset => 1,
209 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
210     datagrid => 1, ## ISSUE: "Flow element" in spec.
211 wakaba 1.29 datatemplate => 1,
212     div => 1, ## ISSUE: No category in spec.
213     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
214     ## Additionally, it must be before any other element or
215     ## non-inter-element-whitespace text node.
216     style => 1,
217    
218 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
219 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
220     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
221 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
222 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
223     command => 1, bb => 1,
224 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
225 wakaba 1.121 textarea => 1, output => 1,
226 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
227     ## NOTE: |area| is allowed only as a descendant of |map|.
228     area => 1,
229    
230 wakaba 1.124 ## NOTE: Transparent.
231     a => 1, ins => 1, del => 1, font => 1,
232 wakaba 1.29
233 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
234 wakaba 1.29 menu => 1,
235    
236     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
237     canvas => 1,
238     },
239    
240     ## NOTE: Embedded
241     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
242     q<http://www.w3.org/2000/svg> => {svg => 1},
243     };
244    
245 wakaba 1.58 my $HTMLSectioningContent = {
246 wakaba 1.57 $HTML_NS => {
247     section => 1, nav => 1, article => 1, aside => 1,
248     ## NOTE: |body| is only allowed in |html| element.
249     body => 1,
250     },
251     };
252    
253 wakaba 1.58 my $HTMLSectioningRoot = {
254 wakaba 1.29 $HTML_NS => {
255 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
256 wakaba 1.29 },
257     };
258    
259     my $HTMLHeadingContent = {
260     $HTML_NS => {
261     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
262     },
263     };
264    
265     my $HTMLPhrasingContent = {
266 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
267 wakaba 1.29 $HTML_NS => {
268 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
269 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
270     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
271 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
272 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
273     command => 1, bb => 1,
274 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
275 wakaba 1.121 textarea => 1, output => 1,
276 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
277     ## NOTE: |area| is allowed only as a descendant of |map|.
278     area => 1,
279    
280     ## NOTE: Transparent.
281 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
282 wakaba 1.29
283 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
284 wakaba 1.29 menu => 1,
285    
286     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
287     canvas => 1,
288     },
289    
290     ## NOTE: Embedded
291     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
292     q<http://www.w3.org/2000/svg> => {svg => 1},
293    
294     ## NOTE: And non-inter-element-whitespace text nodes.
295     };
296    
297 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
298 wakaba 1.29
299     my $HTMLInteractiveContent = {
300     $HTML_NS => {
301     a => 1,
302 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
303     details => 1, datagrid => 1, bb => 1,
304    
305     ## NOTE: When "controls" attribute is specified.
306     video => 1, audio => 1,
307    
308     ## NOTE: When "type=toolbar" attribute is specified.
309     menu => 1,
310 wakaba 1.29 },
311     };
312    
313 wakaba 1.139 ## NOTE: Labelable form-associated element.
314     my $LabelableFAE = {
315     $HTML_NS => {
316     input => 1, button => 1, select => 1, textarea => 1,
317     },
318     };
319    
320 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
321    
322 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
323     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
324    
325     ## -- Common attribute syntacx checkers
326    
327 wakaba 1.1 our $AttrChecker;
328 wakaba 1.82 our $AttrStatus;
329 wakaba 1.1
330     my $GetHTMLEnumeratedAttrChecker = sub {
331     my $states = shift; # {value => conforming ? 1 : -1}
332     return sub {
333     my ($self, $attr) = @_;
334     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
335     if ($states->{$value} > 0) {
336     #
337     } elsif ($states->{$value}) {
338 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
339     level => $self->{level}->{must});
340 wakaba 1.1 } else {
341 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
342     level => $self->{level}->{must});
343 wakaba 1.1 }
344     };
345     }; # $GetHTMLEnumeratedAttrChecker
346    
347     my $GetHTMLBooleanAttrChecker = sub {
348     my $local_name = shift;
349     return sub {
350     my ($self, $attr) = @_;
351 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
352 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
353 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
354 wakaba 1.104 level => $self->{level}->{must});
355 wakaba 1.1 }
356     };
357     }; # $GetHTMLBooleanAttrChecker
358    
359 wakaba 1.8 ## Unordered set of space-separated tokens
360 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
361     my $allowed_words = shift;
362     return sub {
363     my ($self, $attr) = @_;
364     my %word;
365 wakaba 1.132 for my $word (grep {length $_}
366     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
367 wakaba 1.92 unless ($word{$word}) {
368     $word{$word} = 1;
369     if (not defined $allowed_words or
370     $allowed_words->{$word}) {
371     #
372     } else {
373 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
374 wakaba 1.92 value => $word,
375 wakaba 1.104 level => $self->{level}->{must});
376 wakaba 1.92 }
377     } else {
378 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
379     value => $word,
380     level => $self->{level}->{must});
381 wakaba 1.92 }
382 wakaba 1.8 }
383 wakaba 1.92 };
384     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
385 wakaba 1.8
386 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
387 wakaba 1.1 ## whose allowed values are defined by the section on link types)
388     my $HTMLLinkTypesAttrChecker = sub {
389 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
390 wakaba 1.1 my %word;
391 wakaba 1.132 for my $word (grep {length $_}
392     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
393 wakaba 1.1 unless ($word{$word}) {
394     $word{$word} = 1;
395 wakaba 1.18 } elsif ($word eq 'up') {
396     #
397 wakaba 1.1 } else {
398 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
399     value => $word,
400     level => $self->{level}->{must});
401 wakaba 1.1 }
402     }
403     ## NOTE: Case sensitive match (since HTML5 spec does not say link
404     ## types are case-insensitive and it says "The value should not
405     ## be confusingly similar to any other defined value (e.g.
406     ## differing only in case).").
407     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
408     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
409     ## values to be used conformingly.
410 wakaba 1.66
411     my $is_hyperlink;
412     my $is_resource;
413 wakaba 1.1 require Whatpm::_LinkTypeList;
414     our $LinkType;
415     for my $word (keys %word) {
416     my $def = $LinkType->{$word};
417     if (defined $def) {
418     if ($def->{status} eq 'accepted') {
419     if (defined $def->{effect}->[$a_or_area]) {
420     #
421     } else {
422     $self->{onerror}->(node => $attr,
423 wakaba 1.104 type => 'link type:bad context',
424     value => $word,
425 wakaba 1.110 level => $self->{level}->{must});
426 wakaba 1.1 }
427     } elsif ($def->{status} eq 'proposal') {
428 wakaba 1.104 $self->{onerror}->(node => $attr,
429     type => 'link type:proposed',
430     value => $word,
431     level => $self->{level}->{should});
432 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
433     #
434     } else {
435     $self->{onerror}->(node => $attr,
436 wakaba 1.104 type => 'link type:bad context',
437     value => $word,
438     level => $self->{level}->{must});
439 wakaba 1.20 }
440 wakaba 1.1 } else { # rejected or synonym
441     $self->{onerror}->(node => $attr,
442 wakaba 1.104 type => 'link type:non-conforming',
443     value => $word,
444     level => $self->{level}->{must});
445 wakaba 1.1 }
446 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
447     if ($word eq 'alternate') {
448     #
449     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
450 wakaba 1.66 $is_hyperlink = 1;
451 wakaba 1.4 }
452     }
453 wakaba 1.1 if ($def->{unique}) {
454     unless ($self->{has_link_type}->{$word}) {
455     $self->{has_link_type}->{$word} = 1;
456     } else {
457     $self->{onerror}->(node => $attr,
458 wakaba 1.104 type => 'link type:duplicate',
459     value => $word,
460     level => $self->{level}->{must});
461 wakaba 1.1 }
462     }
463 wakaba 1.66
464     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
465     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
466     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
467     }
468 wakaba 1.1 } else {
469 wakaba 1.104 $self->{onerror}->(node => $attr,
470     type => 'unknown link type',
471     value => $word,
472     level => $self->{level}->{uncertain});
473 wakaba 1.1 }
474     }
475 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
476 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
477     ## says that using both X-Pingback: header field and HTML
478     ## <link rel=pingback> is deprecated and if both appears they
479     ## SHOULD contain exactly the same value.
480     ## ISSUE: Pingback 1.0 specification defines the exact representation
481     ## of its link element, which cannot be tested by the current arch.
482     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
483     ## include any string that matches to the pattern for the rel=pingback link,
484     ## which again inpossible to test.
485     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
486 wakaba 1.12
487     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
488 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
489     ## then they SHOULD be described in different paragraphs.".
490 wakaba 1.66
491     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
492     if ($is_hyperlink or $a_or_area) {
493     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
494     }
495     if ($is_resource and not $a_or_area) {
496     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
497     }
498 wakaba 1.96
499     $element_state->{link_rel} = \%word;
500 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
501 wakaba 1.20
502     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
503 wakaba 1.1
504     ## URI (or IRI)
505     my $HTMLURIAttrChecker = sub {
506 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
507 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
508     my $value = $attr->value;
509     Whatpm::URIChecker->check_iri_reference ($value, sub {
510 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
511 wakaba 1.106 }), $self->{level};
512 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
513 wakaba 1.66
514     my $attr_name = $attr->name;
515     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
516     ## TODO: absolute
517     push @{$self->{return}->{uri}->{$value} ||= []},
518     $element_state->{uri_info}->{$attr_name};
519 wakaba 1.1 }; # $HTMLURIAttrChecker
520    
521     ## A space separated list of one or more URIs (or IRIs)
522     my $HTMLSpaceURIsAttrChecker = sub {
523     my ($self, $attr) = @_;
524 wakaba 1.66
525     my $type = {ping => 'action',
526     profile => 'namespace',
527     archive => 'resource'}->{$attr->name};
528    
529 wakaba 1.1 my $i = 0;
530 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
531 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
532 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
533 wakaba 1.106 }, $self->{level});
534 wakaba 1.66
535     ## TODO: absolute
536     push @{$self->{return}->{uri}->{$value} ||= []},
537 wakaba 1.67 {node => $attr, type => {$type => 1}};
538 wakaba 1.66
539 wakaba 1.1 $i++;
540     }
541 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
542 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
543     ## ISSUE: A sequence of white space characters are conformant?
544     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
545 wakaba 1.132 ## ISSUE: What is "space"?
546 wakaba 1.1 ## NOTE: Duplication seems not an error.
547 wakaba 1.4 $self->{has_uri_attr} = 1;
548 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
549    
550 wakaba 1.156 my $ValidEmailAddress;
551     {
552     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
553     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
554     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
555     }
556    
557 wakaba 1.168 ## Valid global date and time.
558     my $GetDateTimeAttrChecker = sub ($) {
559     my $type = shift;
560     return sub {
561     my ($self, $attr, $item, $element_state) = @_;
562    
563     my $range_error;
564    
565     require Message::Date;
566     my $dp = Message::Date->new;
567     $dp->{level} = $self->{level};
568     $dp->{onerror} = sub {
569     my %opt = @_;
570     unless ($opt{type} eq 'date value not supported') {
571     $self->{onerror}->(%opt, node => $attr);
572     $range_error = '';
573     }
574     };
575    
576     my $method = 'parse_' . $type;
577     my $d = $dp->$method ($attr->value);
578     $element_state->{date_value}->{$attr->name} = $d || $range_error;
579     };
580     }; # $GetDateTimeAttrChecker
581 wakaba 1.1
582     my $HTMLIntegerAttrChecker = sub {
583     my ($self, $attr) = @_;
584     my $value = $attr->value;
585     unless ($value =~ /\A-?[0-9]+\z/) {
586 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
587     level => $self->{level}->{must});
588 wakaba 1.1 }
589     }; # $HTMLIntegerAttrChecker
590    
591     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
592     my $range_check = shift;
593     return sub {
594     my ($self, $attr) = @_;
595     my $value = $attr->value;
596     if ($value =~ /\A[0-9]+\z/) {
597     unless ($range_check->($value + 0)) {
598 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
599     level => $self->{level}->{must});
600 wakaba 1.1 }
601     } else {
602     $self->{onerror}->(node => $attr,
603 wakaba 1.104 type => 'nninteger:syntax error',
604     level => $self->{level}->{must});
605 wakaba 1.1 }
606     };
607     }; # $GetHTMLNonNegativeIntegerAttrChecker
608    
609     my $GetHTMLFloatingPointNumberAttrChecker = sub {
610     my $range_check = shift;
611     return sub {
612 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
613 wakaba 1.1 my $value = $attr->value;
614 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
615     $value =~ /\A-?\.[0-9]+\z/) {
616 wakaba 1.168 if ($range_check->($value + 0)) {
617     ## TODO: parse algorithm
618     $element_state->{number_value}->{$attr->name} = $value + 0;
619     } else {
620 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
621     level => $self->{level}->{must});
622 wakaba 1.1 }
623     } else {
624     $self->{onerror}->(node => $attr,
625 wakaba 1.104 type => 'float:syntax error',
626     level => $self->{level}->{must});
627 wakaba 1.1 }
628     };
629 wakaba 1.144
630     ## TODO: scientific notation
631 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
632    
633 wakaba 1.148 my $StepAttrChecker = sub {
634     ## NOTE: A valid floating point number (> 0), or ASCII
635     ## case-insensitive "any".
636    
637     my ($self, $attr) = @_;
638     my $value = $attr->value;
639     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
640     $value =~ /\A-?\.[0-9]+\z/) {
641     unless ($value > 0) {
642     $self->{onerror}->(node => $attr, type => 'float:out of range',
643     level => $self->{level}->{must});
644     }
645     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
646     #
647     } else {
648     $self->{onerror}->(node => $attr,
649     type => 'float:syntax error',
650     level => $self->{level}->{must});
651     }
652    
653     ## TODO: scientific
654     }; # $StepAttrChecker
655    
656 wakaba 1.86 ## HTML4 %Length;
657     my $HTMLLengthAttrChecker = sub {
658     my ($self, $attr) = @_;
659     my $value = $attr->value;
660     unless ($value =~ /\A[0-9]+%?\z/) {
661     $self->{onerror}->(node => $attr, type => 'length:syntax error',
662 wakaba 1.104 level => $self->{level}->{must});
663 wakaba 1.86 }
664    
665     ## NOTE: HTML4 definition is too vague - it does not define the syntax
666     ## of percentage value at all (!).
667     }; # $HTMLLengthAttrChecker
668    
669 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
670     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
671     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
672    
673 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
674     ## ISSUE: RFC 2046 does not define syntax of media types.
675     ## ISSUE: The definition of "a valid MIME type" is unknown.
676     ## Syntactical correctness?
677     my $HTMLIMTAttrChecker = sub {
678     my ($self, $attr) = @_;
679     my $value = $attr->value;
680     ## ISSUE: RFC 2045 Content-Type header field allows insertion
681     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
682     ## ISSUE: RFC 2231 extension? Maybe no.
683     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
684     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
685 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
686 wakaba 1.1 my @type = ($1, $2);
687     my $param = $3;
688 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
689 wakaba 1.1 if (defined $2) {
690     push @type, $1 => $2;
691     } else {
692     my $n = $1;
693 wakaba 1.152 my $v = $3;
694 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
695 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
696 wakaba 1.1 }
697     }
698     require Whatpm::IMTChecker;
699 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
700     $ic->{level} = $self->{level};
701     $ic->check_imt (sub {
702 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
703 wakaba 1.1 }, @type);
704     } else {
705 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
706     level => $self->{level}->{must});
707 wakaba 1.1 }
708     }; # $HTMLIMTAttrChecker
709    
710     my $HTMLLanguageTagAttrChecker = sub {
711 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
712    
713 wakaba 1.1 my ($self, $attr) = @_;
714 wakaba 1.6 my $value = $attr->value;
715     require Whatpm::LangTag;
716     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
717 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
718 wakaba 1.106 }, $self->{level});
719 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
720 wakaba 1.6
721     ## TODO: testdata
722 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
723    
724     ## "A valid media query [MQ]"
725     my $HTMLMQAttrChecker = sub {
726     my ($self, $attr) = @_;
727 wakaba 1.104 $self->{onerror}->(node => $attr,
728     type => 'media query',
729     level => $self->{level}->{uncertain});
730 wakaba 1.1 ## ISSUE: What is "a valid media query"?
731     }; # $HTMLMQAttrChecker
732    
733     my $HTMLEventHandlerAttrChecker = sub {
734     my ($self, $attr) = @_;
735 wakaba 1.104 $self->{onerror}->(node => $attr,
736     type => 'event handler',
737     level => $self->{level}->{uncertain});
738 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
739     ## ECMAScript |FunctionBody| production. [ECMA262]
740     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
741     ## ISSUE: Automatic semicolon insertion does not apply?
742     ## ISSUE: Other script languages?
743     }; # $HTMLEventHandlerAttrChecker
744    
745 wakaba 1.136 my $HTMLFormAttrChecker = sub {
746     my ($self, $attr) = @_;
747    
748     ## NOTE: MUST be the ID of a |form| element.
749    
750     my $value = $attr->value;
751 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
752 wakaba 1.136
753     ## ISSUE: <form id=""><input form=""> (empty ID)?
754     }; # $HTMLFormAttrChecker
755    
756 wakaba 1.158 my $ListAttrChecker = sub {
757     my ($self, $attr) = @_;
758    
759     ## NOTE: MUST be the ID of a |datalist| element.
760    
761     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
762    
763     ## TODO: Warn violation to control-dependent restrictions. For
764     ## example, |<input type=url maxlength=10 list=a> <datalist
765     ## id=a><option value=nonurlandtoolong></datalist>| should be
766     ## warned.
767     }; # $ListAttrChecker
768    
769 wakaba 1.160 my $PatternAttrChecker = sub {
770     my ($self, $attr) = @_;
771     $self->{onsubdoc}->({s => $attr->value,
772     container_node => $attr,
773     media_type => 'text/x-regexp-js',
774     is_char_string => 1});
775 wakaba 1.161
776     ## ISSUE: "value must match the Pattern production of ECMA 262's
777     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
778    
779     ## TODO: Warn if @value does not match @pattern.
780 wakaba 1.160 }; # $PatternAttrChecker
781    
782 wakaba 1.161 my $AcceptAttrChecker = sub {
783     my ($self, $attr) = @_;
784    
785     my $value = $attr->value;
786     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
787     my @value = length $value ? split /,/, $value, -1 : ('');
788     my %has_value;
789     for my $v (@value) {
790     if ($has_value{$v}) {
791     $self->{onerror}->(node => $attr,
792     type => 'duplicate token',
793     value => $v,
794     level => $self->{level}->{must});
795     next;
796     }
797     $has_value{$v} = 1;
798    
799     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
800     #
801     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
802     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
803     ## define its own syntax citing RFC 4288.
804    
805     ## NOTE: Parameters not allowed.
806     require Whatpm::IMTChecker;
807     my $ic = Whatpm::IMTChecker->new;
808     $ic->{level} = $self->{level};
809     $ic->check_imt (sub {
810     $self->{onerror}->(@_, node => $attr);
811     }, $1, $2);
812     } else {
813     $self->{onerror}->(node => $attr,
814     type => 'IMTnp:syntax error', ## TODOC: type
815     value => $v,
816     level => $self->{level}->{must});
817     }
818     }
819     }; # $AcceptAttrChecker
820    
821 wakaba 1.165 my $FormControlNameAttrChecker = sub {
822     my ($self, $attr) = @_;
823    
824     unless (length $attr->value) {
825     $self->{onerror}->(node => $attr,
826     type => 'empty control name', ## TODOC: type
827     level => $self->{level}->{must});
828     }
829    
830     ## NOTE: No uniqueness constraint.
831     }; # $FormControlNameAttrChecker
832    
833     my $AutofocusAttrChecker = sub {
834     my ($self, $attr) = @_;
835    
836     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
837    
838     if ($self->{has_autofocus}) {
839     $self->{onerror}->(node => $attr,
840     type => 'duplicate autofocus', ## TODOC: type
841     level => $self->{level}->{must});
842     }
843     $self->{has_autofocus} = 1;
844     }; # $AutofocusAttrChekcer
845    
846 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
847     my ($self, $attr) = @_;
848 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
849 wakaba 1.1 my $value = $attr->value;
850     if ($value =~ s/^#//) {
851 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
852     ## according to the "rules for parsing a hash-name reference" algorithm.
853     ## The document is non-conforming anyway, since |<map name="">| (empty
854     ## name) is non-conforming.
855 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
856     } else {
857 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
858     level => $self->{level}->{must});
859 wakaba 1.1 }
860 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
861 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
862     }; # $HTMLUsemapAttrChecker
863    
864 wakaba 1.76 ## Valid browsing context name
865     my $HTMLBrowsingContextNameAttrChecker = sub {
866     my ($self, $attr) = @_;
867     my $value = $attr->value;
868     if ($value =~ /^_/) {
869     $self->{onerror}->(node => $attr, type => 'window name:reserved',
870 wakaba 1.104 level => $self->{level}->{must},
871 wakaba 1.76 value => $value);
872     } elsif (length $value) {
873     #
874     } else {
875     $self->{onerror}->(node => $attr, type => 'window name:empty',
876 wakaba 1.104 level => $self->{level}->{must});
877 wakaba 1.76 }
878     }; # $HTMLBrowsingContextNameAttrChecker
879    
880     ## Valid browsing context name or keyword
881 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
882     my ($self, $attr) = @_;
883     my $value = $attr->value;
884     if ($value =~ /^_/) {
885     $value = lc $value; ## ISSUE: ASCII case-insentitive?
886     unless ({
887 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
888 wakaba 1.1 }->{$value}) {
889     $self->{onerror}->(node => $attr,
890 wakaba 1.76 type => 'window name:reserved',
891 wakaba 1.104 level => $self->{level}->{must},
892 wakaba 1.76 value => $value);
893 wakaba 1.1 }
894 wakaba 1.76 } elsif (length $value) {
895     #
896 wakaba 1.1 } else {
897 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
898 wakaba 1.104 level => $self->{level}->{must});
899 wakaba 1.1 }
900     }; # $HTMLTargetAttrChecker
901    
902 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
903     my ($self, $attr) = @_;
904    
905     ## ISSUE: Namespace resolution?
906    
907     my $value = $attr->value;
908    
909     require Whatpm::CSS::SelectorsParser;
910     my $p = Whatpm::CSS::SelectorsParser->new;
911     $p->{pseudo_class}->{$_} = 1 for qw/
912     active checked disabled empty enabled first-child first-of-type
913     focus hover indeterminate last-child last-of-type link only-child
914     only-of-type root target visited
915     lang nth-child nth-last-child nth-of-type nth-last-of-type not
916     -manakai-contains -manakai-current
917     /;
918    
919     $p->{pseudo_element}->{$_} = 1 for qw/
920     after before first-letter first-line
921     /;
922    
923 wakaba 1.104 $p->{level} = $self->{level};
924 wakaba 1.23 $p->{onerror} = sub {
925 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
926 wakaba 1.23 };
927     $p->parse_string ($value);
928     }; # $HTMLSelectorsAttrChecker
929    
930 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
931     my ($charset_value, $self, $attr, $ascii_compat) = @_;
932    
933     ## NOTE: This code is used for |charset=""| attributes, |charset=|
934     ## portion of the |content=""| attributes, and |accept-charset=""|
935     ## attributes.
936 wakaba 1.91
937     ## NOTE: Though the case-sensitivility of |charset| attribute value
938     ## is not explicitly spelled in the HTML5 spec, the Character Set
939     ## registry of IANA, which is referenced from HTML5 spec, says that
940     ## charset name is case-insensitive.
941     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
942    
943     require Message::Charset::Info;
944     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
945    
946     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
947     ## Syntactically valid and registered? What about x-charset names?
948     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
949     ($charset_value)) {
950     $self->{onerror}->(node => $attr,
951 wakaba 1.104 type => 'charset:syntax error',
952     value => $charset_value,
953     level => $self->{level}->{must});
954 wakaba 1.91 }
955    
956     if ($charset) {
957     ## ISSUE: What is "the preferred name for that encoding" (for a charset
958     ## with no "preferred MIME name" label)?
959     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
960     if (($charset_status &
961     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
962     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
963     $self->{onerror}->(node => $attr,
964 wakaba 1.104 type => 'charset:not preferred',
965     value => $charset_value,
966     level => $self->{level}->{must});
967 wakaba 1.91 }
968 wakaba 1.129
969 wakaba 1.91 if (($charset_status &
970     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
971     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
972     if ($charset_value =~ /^x-/) {
973     $self->{onerror}->(node => $attr,
974 wakaba 1.104 type => 'charset:private',
975     value => $charset_value,
976     level => $self->{level}->{good});
977 wakaba 1.91 } else {
978     $self->{onerror}->(node => $attr,
979 wakaba 1.104 type => 'charset:not registered',
980     value => $charset_value,
981     level => $self->{level}->{good});
982 wakaba 1.91 }
983     }
984 wakaba 1.129
985     if ($ascii_compat) {
986     if ($charset->{category} &
987     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
988     #
989     } else {
990     $self->{onerror}->(node => $attr,
991     type => 'charset:not ascii compat',
992     value => $charset_value,
993     level => $self->{level}->{must});
994     }
995     }
996    
997 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
998     } elsif ($charset_value =~ /^x-/) {
999     $self->{onerror}->(node => $attr,
1000 wakaba 1.104 type => 'charset:private',
1001     value => $charset_value,
1002     level => $self->{level}->{good});
1003 wakaba 1.129
1004     ## NOTE: Whether this is an ASCII-compatible character encoding or
1005     ## not is unknown.
1006 wakaba 1.91 } else {
1007     $self->{onerror}->(node => $attr,
1008 wakaba 1.104 type => 'charset:not registered',
1009     value => $charset_value,
1010     level => $self->{level}->{good});
1011 wakaba 1.129
1012     ## NOTE: Whether this is an ASCII-compatible character encoding or
1013     ## not is unknown.
1014 wakaba 1.91 }
1015    
1016     return ($charset, $charset_value);
1017     }; # $HTMLCharsetChecker
1018    
1019 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1020     ## MUST be the preferred name of an ASCII-compatible character
1021     ## encoding".
1022     my $HTMLCharsetsAttrChecker = sub {
1023     my ($self, $attr) = @_;
1024    
1025     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1026    
1027 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1028 wakaba 1.129
1029 wakaba 1.176 ## XXX
1030 wakaba 1.129 ## ISSUE: Uniqueness is not enforced.
1031    
1032     for my $charset (@value) {
1033     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1034     }
1035    
1036     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1037     }; # $HTMLCharsetsAttrChecker
1038    
1039 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1040     my ($self, $attr) = @_;
1041    
1042     ## NOTE: HTML4 "color" or |%Color;|
1043    
1044     my $value = $attr->value;
1045    
1046     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1047 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1048 wakaba 1.105 level => $self->{level}->{html4_fact});
1049 wakaba 1.68 }
1050    
1051     ## TODO: HTML4 has some guideline on usage of color.
1052     }; # $HTMLColorAttrChecker
1053    
1054 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1055     my ($self, $attr) = @_;
1056     $HTMLURIAttrChecker->(@_);
1057    
1058     my $attr_name = $attr->name;
1059    
1060     if ($attr_name eq 'ref') {
1061     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1062     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1063 wakaba 1.104 level => $self->{level}->{must});
1064 wakaba 1.79 }
1065     }
1066 wakaba 1.155
1067     require Message::URL;
1068 wakaba 1.79 my $doc = $attr->owner_document;
1069     my $doc_uri = $doc->document_uri;
1070 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1071 wakaba 1.79 my $no_frag_uri = $uri->clone;
1072     $no_frag_uri->uri_fragment (undef);
1073     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1074     (not defined $doc_uri and $no_frag_uri eq '')) {
1075     my $fragid = $uri->uri_fragment;
1076     if (defined $fragid) {
1077     push @{$self->{$attr_name}}, [$fragid => $attr];
1078     } else {
1079     DOCEL: {
1080     last DOCEL unless $attr_name eq 'template';
1081    
1082     my $docel = $doc->document_element;
1083     if ($docel) {
1084     my $nsuri = $docel->namespace_uri;
1085     if (defined $nsuri and $nsuri eq $HTML_NS) {
1086     if ($docel->manakai_local_name eq 'datatemplate') {
1087     last DOCEL;
1088     }
1089     }
1090     }
1091    
1092     $self->{onerror}->(node => $attr, type => 'template:not template',
1093 wakaba 1.104 level => $self->{level}->{must});
1094 wakaba 1.79 } # DOCEL
1095     }
1096     } else {
1097     ## TODO: An external document is referenced.
1098     ## The document MUST be an HTML or XML document.
1099     ## If there is a fragment identifier, it MUST point a part of the doc.
1100     ## If the attribute is |template|, the pointed part MUST be a
1101     ## |datatemplat| element.
1102     ## If no fragment identifier is specified, the root element MUST be
1103     ## a |datatemplate| element when the attribute is |template|.
1104     }
1105     }; # $HTMLRefOrTemplateAttrChecker
1106    
1107 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1108     my ($self, $attr) = @_;
1109    
1110     if (defined $attr->namespace_uri) {
1111     my $oe = $attr->owner_element;
1112     my $oe_nsuri = $oe->namespace_uri;
1113 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1114 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1115 wakaba 1.104 level => $self->{level}->{must});
1116 wakaba 1.83 }
1117     }
1118    
1119     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1120     }; # $HTMLRepeatIndexAttrChecker
1121    
1122 wakaba 1.179 my $PlaceholderAttrChecker = sub {
1123     my ($self, $attr) = @_;
1124     if ($attr->value =~ /[\x0D\x0A]/) {
1125     $self->{onerror}->(node => $attr,
1126     type => 'newline in value', ## TODOC: type
1127     level => $self->{level}->{must});
1128     }
1129     }; # $PlaceholderAttrChecker
1130    
1131 wakaba 1.1 my $HTMLAttrChecker = {
1132 wakaba 1.176 accesskey => sub {
1133     my ($self, $attr) = @_;
1134    
1135     ## "Ordered set of unique space-separated tokens"
1136    
1137     my %keys;
1138     my @keys = grep {length} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1139    
1140     for my $key (@keys) {
1141     unless ($keys{$key}) {
1142     $keys{$key} = 1;
1143     if (length $key != 1) {
1144     $self->{onerror}->(node => $attr, type => 'char:syntax error',
1145     value => $key,
1146     level => $self->{level}->{must});
1147     }
1148     } else {
1149     $self->{onerror}->(node => $attr, type => 'duplicate token',
1150     value => $key,
1151     level => $self->{level}->{must});
1152     }
1153     }
1154     }, # accesskey
1155    
1156 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1157 wakaba 1.1 id => sub {
1158 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1159 wakaba 1.1 my $value = $attr->value;
1160     if (length $value > 0) {
1161     if ($self->{id}->{$value}) {
1162 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1163     level => $self->{level}->{must});
1164 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1165     } else {
1166     $self->{id}->{$value} = [$attr];
1167 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1168 wakaba 1.1 }
1169 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1170 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1171     level => $self->{level}->{must});
1172 wakaba 1.1 }
1173     } else {
1174     ## NOTE: MUST contain at least one character
1175 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1176     level => $self->{level}->{must});
1177 wakaba 1.1 }
1178     },
1179     title => sub {}, ## NOTE: No conformance creteria
1180     lang => sub {
1181     my ($self, $attr) = @_;
1182 wakaba 1.6 my $value = $attr->value;
1183     if ($value eq '') {
1184     #
1185     } else {
1186     require Whatpm::LangTag;
1187     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1188 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1189 wakaba 1.106 }, $self->{level});
1190 wakaba 1.6 }
1191 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1192 wakaba 1.6
1193     ## TODO: test data
1194 wakaba 1.111
1195     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1196     ## non-conforming. Such errors are detected by the checkers of
1197     ## |{}xml:lang| and |{xml}:lang| attributes.
1198 wakaba 1.1 },
1199     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1200     class => sub {
1201     my ($self, $attr) = @_;
1202 wakaba 1.132
1203     ## NOTE: "Unordered set of unique space-separated tokens".
1204    
1205 wakaba 1.1 my %word;
1206 wakaba 1.132 for my $word (grep {length $_}
1207     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1208 wakaba 1.1 unless ($word{$word}) {
1209     $word{$word} = 1;
1210     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1211     } else {
1212 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1213     value => $word,
1214     level => $self->{level}->{must});
1215 wakaba 1.1 }
1216     }
1217     },
1218 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1219     true => 1, false => 1, '' => 1,
1220     }),
1221 wakaba 1.1 contextmenu => sub {
1222     my ($self, $attr) = @_;
1223     my $value = $attr->value;
1224 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1225 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1226     ## What is "in the DOM"? A menu Element node that is not part
1227     ## of the Document tree is in the DOM? A menu Element node that
1228     ## belong to another Document tree is in the DOM?
1229     },
1230 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1231 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1232 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1233     registrationmark => sub {
1234     my ($self, $attr, $item, $element_state) = @_;
1235    
1236     ## NOTE: Any value is conforming.
1237    
1238     if ($self->{flag}->{in_rule}) {
1239     my $el = $attr->owner_element;
1240     my $ln = $el->manakai_local_name;
1241     if ($ln eq 'nest' or
1242     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1243     my $nsuri = $el->namespace_uri;
1244     if (defined $nsuri and $nsuri eq $HTML_NS) {
1245     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1246 wakaba 1.104 level => $self->{level}->{must});
1247 wakaba 1.79 }
1248     }
1249     } else {
1250     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1251 wakaba 1.104 level => $self->{level}->{must});
1252 wakaba 1.79 }
1253     },
1254 wakaba 1.80 repeat => sub {
1255     my ($self, $attr) = @_;
1256 wakaba 1.83
1257     if (defined $attr->namespace_uri) {
1258     my $oe = $attr->owner_element;
1259     my $oe_nsuri = $oe->namespace_uri;
1260     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1261     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1262 wakaba 1.104 level => $self->{level}->{must});
1263 wakaba 1.83 }
1264     }
1265    
1266 wakaba 1.80 my $value = $attr->value;
1267     if ($value eq 'template') {
1268     #
1269     } elsif ($value =~ /\A-?[0-9]+\z/) {
1270     #
1271     } else {
1272     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1273 wakaba 1.104 level => $self->{level}->{must});
1274 wakaba 1.80 }
1275    
1276     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1277     ## that the attribute MAY be specified to any element, or that the
1278     ## element with that attribute (i.e. a repetition template) can be
1279     ## inserted anywhere in a document tree?
1280     },
1281 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1282     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1283     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1284 wakaba 1.80 'repeat-template' => sub {
1285 wakaba 1.83 my ($self, $attr) = @_;
1286    
1287     if (defined $attr->namespace_uri) {
1288     my $oe = $attr->owner_element;
1289     my $oe_nsuri = $oe->namespace_uri;
1290 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1291 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1292 wakaba 1.104 level => $self->{level}->{must});
1293 wakaba 1.83 }
1294     }
1295    
1296 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1297     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1298     ## attribute allowed on an element that is not a repetition block?
1299     },
1300 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1301 wakaba 1.184 spellcheck => $GetHTMLEnumeratedAttrChecker->({
1302     true => 1, false => 1, '' => 1,
1303     }),
1304 wakaba 1.128 style => sub {
1305     my ($self, $attr) = @_;
1306    
1307     $self->{onsubdoc}->({s => $attr->value,
1308     container_node => $attr,
1309     media_type => 'text/x-css-inline',
1310     is_char_string => 1});
1311    
1312     ## NOTE: "... MUST still be comprehensible and usable if those
1313     ## attributes were removed" is a semantic requirement, it cannot
1314     ## be tested.
1315     },
1316 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1317 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1318 wakaba 1.111 'xml:lang' => sub {
1319     my ($self, $attr) = @_;
1320    
1321     if ($attr->owner_document->manakai_is_html) {
1322     $self->{onerror}->(type => 'in HTML:xml:lang',
1323     level => $self->{level}->{info},
1324     node => $attr);
1325     ## NOTE: This is not an error, but the attribute will be ignored.
1326     } else {
1327     $self->{onerror}->(type => 'in XML:xml:lang',
1328     level => $self->{level}->{html5_no_may},
1329     node => $attr);
1330     ## TODO: We need to add test for this error.
1331     }
1332    
1333     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1334     (undef, 'lang');
1335     if ($lang_attr) {
1336     my $lang_attr_value = $lang_attr->value;
1337     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1338     my $value = $attr->value;
1339     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1340     if ($lang_attr_value ne $value) {
1341     $self->{onerror}->(type => 'xml:lang ne lang',
1342     level => $self->{level}->{must},
1343     node => $attr);
1344     }
1345     } else {
1346     $self->{onerror}->(type => 'xml:lang not allowed',
1347     level => $self->{level}->{must},
1348     node => $attr);
1349     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1350     }
1351     },
1352 wakaba 1.74 xmlns => sub {
1353     my ($self, $attr) = @_;
1354     my $value = $attr->value;
1355     unless ($value eq $HTML_NS) {
1356 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1357     level => $self->{level}->{must});
1358 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1359     }
1360     unless ($attr->owner_document->manakai_is_html) {
1361 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1362     level => $self->{level}->{must});
1363 wakaba 1.74 ## TODO: Test
1364     }
1365    
1366     ## TODO: Should be resolved?
1367     push @{$self->{return}->{uri}->{$value} ||= []},
1368     {node => $attr, type => {namespace => 1}};
1369     },
1370 wakaba 1.1 };
1371    
1372 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1373    
1374 wakaba 1.49 my %HTMLAttrStatus = (
1375 wakaba 1.176 accesskey => FEATURE_HTML5_FD,
1376 wakaba 1.187 class => FEATURE_HTML5_LC,
1377     contenteditable => FEATURE_HTML5_REC,
1378 wakaba 1.50 contextmenu => FEATURE_HTML5_WD,
1379 wakaba 1.187 dir => FEATURE_HTML5_REC,
1380 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1381 wakaba 1.187 hidden => FEATURE_HTML5_LC,
1382     id => FEATURE_HTML5_REC,
1383 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1384 wakaba 1.187 lang => FEATURE_HTML5_REC,
1385 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1386     registrationmark => FEATURE_HTML5_AT_RISK,
1387 wakaba 1.60 repeat => FEATURE_WF2,
1388     'repeat-max' => FEATURE_WF2,
1389     'repeat-min' => FEATURE_WF2,
1390     'repeat-start' => FEATURE_WF2,
1391     'repeat-template' => FEATURE_WF2,
1392 wakaba 1.154 role => 0,
1393 wakaba 1.184 spellcheck => FEATURE_HTML5_WD,
1394 wakaba 1.187 style => FEATURE_HTML5_REC,
1395 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1396     template => FEATURE_HTML5_AT_RISK,
1397 wakaba 1.187 title => FEATURE_HTML5_REC,
1398 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1399 wakaba 1.49 );
1400    
1401     my %HTMLM12NCommonAttrStatus = (
1402 wakaba 1.154 about => FEATURE_RDFA_REC,
1403 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
1404 wakaba 1.154 content => FEATURE_RDFA_REC,
1405     datatype => FEATURE_RDFA_REC,
1406 wakaba 1.187 dir => FEATURE_HTML5_REC,
1407 wakaba 1.154 href => FEATURE_RDFA_REC,
1408 wakaba 1.187 id => FEATURE_HTML5_REC,
1409 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1410 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1411     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1412     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1413     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1414     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1415     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1416     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1417     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1418     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1419     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1420 wakaba 1.154 property => FEATURE_RDFA_REC,
1421     rel => FEATURE_RDFA_REC,
1422     resource => FEATURE_RDFA_REC,
1423     rev => FEATURE_RDFA_REC,
1424 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1425 wakaba 1.78 # FEATURE_M12N10_REC,
1426 wakaba 1.187 style => FEATURE_HTML5_REC,
1427     title => FEATURE_HTML5_REC,
1428 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1429 wakaba 1.49 );
1430    
1431 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1432     ## Core
1433 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
1434     id => FEATURE_HTML5_REC,
1435 wakaba 1.82 #xml:id
1436     layout => FEATURE_XHTML2_ED,
1437 wakaba 1.187 title => FEATURE_HTML5_REC,
1438 wakaba 1.82
1439     ## Hypertext
1440     cite => FEATURE_XHTML2_ED,
1441     href => FEATURE_XHTML2_ED,
1442     hreflang => FEATURE_XHTML2_ED,
1443     hrefmedia => FEATURE_XHTML2_ED,
1444     hreftype => FEATURE_XHTML2_ED,
1445     nextfocus => FEATURE_XHTML2_ED,
1446     prevfocus => FEATURE_XHTML2_ED,
1447     target => FEATURE_XHTML2_ED,
1448     #xml:base
1449    
1450     ## I18N
1451     #xml:lang
1452    
1453     ## Bi-directional
1454 wakaba 1.187 dir => FEATURE_HTML5_REC,
1455 wakaba 1.82
1456     ## Edit
1457     edit => FEATURE_XHTML2_ED,
1458     datetime => FEATURE_XHTML2_ED,
1459    
1460     ## Embedding
1461     encoding => FEATURE_XHTML2_ED,
1462     src => FEATURE_XHTML2_ED,
1463     srctype => FEATURE_XHTML2_ED,
1464    
1465     ## Image Map
1466     usemap => FEATURE_XHTML2_ED,
1467     ismap => FEATURE_XHTML2_ED,
1468     shape => FEATURE_XHTML2_ED,
1469     coords => FEATURE_XHTML2_ED,
1470    
1471     ## Media
1472     media => FEATURE_XHTML2_ED,
1473    
1474     ## Metadata
1475     about => FEATURE_XHTML2_ED,
1476     content => FEATURE_XHTML2_ED,
1477     datatype => FEATURE_XHTML2_ED,
1478     instanceof => FEATURE_XHTML2_ED,
1479     property => FEATURE_XHTML2_ED,
1480     rel => FEATURE_XHTML2_ED,
1481     resource => FEATURE_XHTML2_ED,
1482     rev => FEATURE_XHTML2_ED,
1483    
1484     ## Role
1485 wakaba 1.154 role => FEATURE_XHTML2_ED,
1486 wakaba 1.82
1487     ## Style
1488 wakaba 1.187 style => FEATURE_HTML5_REC,
1489 wakaba 1.82 );
1490    
1491     my %HTMLM12NXHTML2CommonAttrStatus = (
1492     %HTMLM12NCommonAttrStatus,
1493     %XHTML2CommonAttrStatus,
1494    
1495 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1496 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1497 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1498     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1499 wakaba 1.187 dir => FEATURE_HTML5_REC,
1500 wakaba 1.154 href => FEATURE_RDFA_REC,
1501 wakaba 1.187 id => FEATURE_HTML5_REC,
1502 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1503     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1504     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1505     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1506     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1507 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1508 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1509 wakaba 1.187 style => FEATURE_HTML5_REC,
1510     title => FEATURE_HTML5_REC,
1511 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1512 wakaba 1.82 );
1513    
1514 wakaba 1.1 for (qw/
1515 wakaba 1.188 onabort onblur onchange onclick oncontextmenu
1516 wakaba 1.1 ondblclick ondrag ondragend ondragenter ondragleave ondragover
1517     ondragstart ondrop onerror onfocus onkeydown onkeypress
1518 wakaba 1.180 onkeyup onload onmousedown onmousemove onmouseout
1519 wakaba 1.188 onmouseover onmouseup onmousewheel onscroll onselect
1520     onsubmit
1521 wakaba 1.1 /) {
1522     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1523 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1524 wakaba 1.1 }
1525    
1526 wakaba 1.170 for (qw/
1527 wakaba 1.188 onbeforeunload onhashchange onresize onstorage onunload
1528 wakaba 1.170 ondataunavailable
1529 wakaba 1.180 onmessage
1530 wakaba 1.170 /) {
1531     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1532     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1533     }
1534    
1535 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1536     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1537     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1538    
1539     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1540     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1541     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1542     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1543     }
1544    
1545 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1546 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1547 wakaba 1.82 }
1548 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1549     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1550 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1551     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1552     ismap layout media nextfocus prevfocus shape src srctype style
1553     target usemap/) {
1554     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1555     }
1556     for (qw/class dir id title/) {
1557     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1558     }
1559     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1560     onmouseout onkeypress onkeydown onkeyup/) {
1561     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1562     }
1563    
1564 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1565     ## NOTE: "Authors should ... when the attributes are ignored and
1566     ## any associated CSS dropped, the page is still usable." (semantic
1567     ## constraint.)
1568     }; # $HTMLDatasetAttrChecker
1569    
1570 wakaba 1.187 my $HTMLDatasetAttrStatus = FEATURE_HTML5_LC;
1571 wakaba 1.73
1572 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1573     my $element_specific_checker = shift;
1574 wakaba 1.49 my $element_specific_status = shift;
1575 wakaba 1.1 return sub {
1576 wakaba 1.40 my ($self, $item, $element_state) = @_;
1577     for my $attr (@{$item->{node}->attributes}) {
1578 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1579     $attr_ns = '' unless defined $attr_ns;
1580     my $attr_ln = $attr->manakai_local_name;
1581     my $checker;
1582 wakaba 1.73 my $status;
1583 wakaba 1.1 if ($attr_ns eq '') {
1584 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1585     $attr_ln !~ /[A-Z]/) {
1586 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1587     $status = $HTMLDatasetAttrStatus;
1588     } else {
1589     $checker = $element_specific_checker->{$attr_ln}
1590     || $HTMLAttrChecker->{$attr_ln};
1591     $status = $element_specific_status->{$attr_ln};
1592     }
1593 wakaba 1.1 }
1594     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1595 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1596 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1597     || $AttrStatus->{$attr_ns}->{''};
1598     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1599 wakaba 1.1 if ($checker) {
1600 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1601 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1602 wakaba 1.54 #
1603 wakaba 1.1 } else {
1604 wakaba 1.104 $self->{onerror}->(node => $attr,
1605     type => 'unknown attribute',
1606     level => $self->{level}->{uncertain});
1607 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1608     }
1609 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1610 wakaba 1.1 }
1611     };
1612     }; # $GetHTMLAttrsChecker
1613    
1614 wakaba 1.40 my %HTMLChecker = (
1615     %Whatpm::ContentChecker::AnyChecker,
1616 wakaba 1.79 check_start => sub {
1617     my ($self, $item, $element_state) = @_;
1618    
1619     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1620     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1621     },
1622 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1623 wakaba 1.40 );
1624    
1625     my %HTMLEmptyChecker = (
1626     %HTMLChecker,
1627     check_child_element => sub {
1628     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1629     $child_is_transparent, $element_state) = @_;
1630 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1631     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1632 wakaba 1.40 $self->{onerror}->(node => $child_el,
1633     type => 'element not allowed:minus',
1634 wakaba 1.104 level => $self->{level}->{must});
1635 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1636     #
1637     } else {
1638     $self->{onerror}->(node => $child_el,
1639     type => 'element not allowed:empty',
1640 wakaba 1.104 level => $self->{level}->{must});
1641 wakaba 1.40 }
1642     },
1643     check_child_text => sub {
1644     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1645     if ($has_significant) {
1646     $self->{onerror}->(node => $child_node,
1647     type => 'character not allowed:empty',
1648 wakaba 1.104 level => $self->{level}->{must});
1649 wakaba 1.40 }
1650     },
1651     );
1652    
1653     my %HTMLTextChecker = (
1654     %HTMLChecker,
1655     check_child_element => sub {
1656     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1657     $child_is_transparent, $element_state) = @_;
1658 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1659     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1660 wakaba 1.40 $self->{onerror}->(node => $child_el,
1661     type => 'element not allowed:minus',
1662 wakaba 1.104 level => $self->{level}->{must});
1663 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1664     #
1665     } else {
1666 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1667     level => $self->{level}->{must});
1668 wakaba 1.40 }
1669     },
1670     );
1671    
1672 wakaba 1.72 my %HTMLFlowContentChecker = (
1673 wakaba 1.40 %HTMLChecker,
1674     check_child_element => sub {
1675     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1676     $child_is_transparent, $element_state) = @_;
1677 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1678     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1679 wakaba 1.40 $self->{onerror}->(node => $child_el,
1680     type => 'element not allowed:minus',
1681 wakaba 1.104 level => $self->{level}->{must});
1682 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1683     #
1684     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1685     if ($element_state->{has_non_style} or
1686     not $child_el->has_attribute_ns (undef, 'scoped')) {
1687 wakaba 1.104 $self->{onerror}->(node => $child_el,
1688 wakaba 1.72 type => 'element not allowed:flow style',
1689 wakaba 1.104 level => $self->{level}->{must});
1690 wakaba 1.40 }
1691 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1692 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1693 wakaba 1.40 } else {
1694     $element_state->{has_non_style} = 1;
1695 wakaba 1.104 $self->{onerror}->(node => $child_el,
1696 wakaba 1.72 type => 'element not allowed:flow',
1697 wakaba 1.104 level => $self->{level}->{must})
1698 wakaba 1.40 }
1699     },
1700     check_child_text => sub {
1701     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1702     if ($has_significant) {
1703     $element_state->{has_non_style} = 1;
1704     }
1705     },
1706     check_end => sub {
1707     my ($self, $item, $element_state) = @_;
1708 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1709 wakaba 1.40 if ($element_state->{has_significant}) {
1710 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1711 wakaba 1.40 } elsif ($item->{transparent}) {
1712     #
1713     } else {
1714     $self->{onerror}->(node => $item->{node},
1715 wakaba 1.104 level => $self->{level}->{should},
1716 wakaba 1.40 type => 'no significant content');
1717     }
1718     },
1719     );
1720    
1721     my %HTMLPhrasingContentChecker = (
1722     %HTMLChecker,
1723     check_child_element => sub {
1724     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1725     $child_is_transparent, $element_state) = @_;
1726 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1727     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1728 wakaba 1.40 $self->{onerror}->(node => $child_el,
1729     type => 'element not allowed:minus',
1730 wakaba 1.104 level => $self->{level}->{must});
1731 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1732     #
1733     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1734     #
1735     } else {
1736     $self->{onerror}->(node => $child_el,
1737     type => 'element not allowed:phrasing',
1738 wakaba 1.104 level => $self->{level}->{must});
1739 wakaba 1.40 }
1740     },
1741 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1742 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1743 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1744 wakaba 1.40 ## and |check_child_text|.
1745     );
1746    
1747 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1748 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1749 wakaba 1.46 ## with parent?
1750 wakaba 1.40
1751 wakaba 1.1 our $Element;
1752     our $ElementDefault;
1753    
1754     $Element->{$HTML_NS}->{''} = {
1755 wakaba 1.40 %HTMLChecker,
1756 wakaba 1.1 };
1757    
1758     $Element->{$HTML_NS}->{html} = {
1759 wakaba 1.187 status => FEATURE_HTML5_REC,
1760 wakaba 1.1 is_root => 1,
1761 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1762 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1763 wakaba 1.67 version => sub {
1764     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1765     ## Though DTDs of various versions of HTML define the attribute
1766     ## as |#FIXED|, this conformance checker does no check for
1767     ## the attribute value, since what kind of check should be done
1768     ## is unknown.
1769     },
1770 wakaba 1.49 }, {
1771     %HTMLAttrStatus,
1772 wakaba 1.82 %XHTML2CommonAttrStatus,
1773 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1774     dir => FEATURE_HTML5_REC,
1775     id => FEATURE_HTML5_REC,
1776     lang => FEATURE_HTML5_REC,
1777 wakaba 1.153 manifest => FEATURE_HTML5_WD,
1778 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1779 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1780 wakaba 1.1 }),
1781 wakaba 1.40 check_start => sub {
1782     my ($self, $item, $element_state) = @_;
1783     $element_state->{phase} = 'before head';
1784 wakaba 1.79
1785 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1786 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1787     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1788 wakaba 1.40 },
1789     check_child_element => sub {
1790     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1791     $child_is_transparent, $element_state) = @_;
1792 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1793     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1794 wakaba 1.40 $self->{onerror}->(node => $child_el,
1795     type => 'element not allowed:minus',
1796 wakaba 1.104 level => $self->{level}->{must});
1797 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1798     #
1799     } elsif ($element_state->{phase} eq 'before head') {
1800     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1801     $element_state->{phase} = 'after head';
1802     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1803     $self->{onerror}->(node => $child_el,
1804 wakaba 1.104 type => 'ps element missing',
1805     text => 'head',
1806     level => $self->{level}->{must});
1807 wakaba 1.40 $element_state->{phase} = 'after body';
1808     } else {
1809     $self->{onerror}->(node => $child_el,
1810 wakaba 1.104 type => 'element not allowed',
1811     level => $self->{level}->{must});
1812 wakaba 1.40 }
1813     } elsif ($element_state->{phase} eq 'after head') {
1814     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1815     $element_state->{phase} = 'after body';
1816     } else {
1817     $self->{onerror}->(node => $child_el,
1818 wakaba 1.104 type => 'element not allowed',
1819     level => $self->{level}->{must});
1820 wakaba 1.40 }
1821     } elsif ($element_state->{phase} eq 'after body') {
1822     $self->{onerror}->(node => $child_el,
1823 wakaba 1.104 type => 'element not allowed',
1824     level => $self->{level}->{must});
1825 wakaba 1.40 } else {
1826     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1827     }
1828     },
1829     check_child_text => sub {
1830     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1831     if ($has_significant) {
1832     $self->{onerror}->(node => $child_node,
1833 wakaba 1.104 type => 'character not allowed',
1834     level => $self->{level}->{must});
1835 wakaba 1.40 }
1836     },
1837     check_end => sub {
1838     my ($self, $item, $element_state) = @_;
1839     if ($element_state->{phase} eq 'after body') {
1840     #
1841     } elsif ($element_state->{phase} eq 'before head') {
1842     $self->{onerror}->(node => $item->{node},
1843 wakaba 1.104 type => 'child element missing',
1844     text => 'head',
1845     level => $self->{level}->{must});
1846 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1847 wakaba 1.104 type => 'child element missing',
1848     text => 'body',
1849     level => $self->{level}->{must});
1850 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1851     $self->{onerror}->(node => $item->{node},
1852 wakaba 1.104 type => 'child element missing',
1853     text => 'body',
1854     level => $self->{level}->{must});
1855 wakaba 1.40 } else {
1856     die "check_end: Bad |html| phase: $element_state->{phase}";
1857     }
1858 wakaba 1.1
1859 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1860     },
1861     };
1862 wakaba 1.25
1863 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1864 wakaba 1.187 status => FEATURE_HTML5_REC,
1865 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1866     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1867     }, {
1868 wakaba 1.49 %HTMLAttrStatus,
1869 wakaba 1.82 %XHTML2CommonAttrStatus,
1870 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1871     dir => FEATURE_HTML5_REC,
1872     id => FEATURE_HTML5_REC,
1873     lang => FEATURE_HTML5_REC,
1874 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1875     }),
1876 wakaba 1.40 check_child_element => sub {
1877     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1878     $child_is_transparent, $element_state) = @_;
1879 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1880     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1881 wakaba 1.40 $self->{onerror}->(node => $child_el,
1882     type => 'element not allowed:minus',
1883 wakaba 1.104 level => $self->{level}->{must});
1884 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1885     #
1886     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1887     unless ($element_state->{has_title}) {
1888     $element_state->{has_title} = 1;
1889     } else {
1890     $self->{onerror}->(node => $child_el,
1891     type => 'element not allowed:head title',
1892 wakaba 1.104 level => $self->{level}->{must});
1893 wakaba 1.40 }
1894     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1895     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1896     $self->{onerror}->(node => $child_el,
1897     type => 'element not allowed:head style',
1898 wakaba 1.104 level => $self->{level}->{must});
1899 wakaba 1.1 }
1900 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1901     #
1902    
1903     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1904     ## a |meta| element with none of |charset|, |name|,
1905     ## or |http-equiv| attribute is not allowed. It is non-conforming
1906     ## anyway.
1907 wakaba 1.56
1908     ## TODO: |form| MUST be empty and in XML [WF2].
1909 wakaba 1.40 } else {
1910     $self->{onerror}->(node => $child_el,
1911     type => 'element not allowed:metadata',
1912 wakaba 1.104 level => $self->{level}->{must});
1913 wakaba 1.40 }
1914     $element_state->{in_head_original} = $self->{flag}->{in_head};
1915     $self->{flag}->{in_head} = 1;
1916     },
1917     check_child_text => sub {
1918     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1919     if ($has_significant) {
1920 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1921     level => $self->{level}->{must});
1922 wakaba 1.1 }
1923 wakaba 1.40 },
1924     check_end => sub {
1925     my ($self, $item, $element_state) = @_;
1926     unless ($element_state->{has_title}) {
1927     $self->{onerror}->(node => $item->{node},
1928 wakaba 1.104 type => 'child element missing',
1929     text => 'title',
1930 wakaba 1.105 level => $self->{level}->{must});
1931 wakaba 1.1 }
1932 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1933 wakaba 1.1
1934 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1935 wakaba 1.1 },
1936     };
1937    
1938 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1939     %HTMLTextChecker,
1940 wakaba 1.187 status => FEATURE_HTML5_REC,
1941 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1942     %HTMLAttrStatus,
1943 wakaba 1.82 %XHTML2CommonAttrStatus,
1944 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1945     dir => FEATURE_HTML5_REC,
1946     id => FEATURE_HTML5_REC,
1947     lang => FEATURE_HTML5_REC,
1948 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1949 wakaba 1.49 }),
1950 wakaba 1.40 };
1951 wakaba 1.1
1952 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1953 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1954 wakaba 1.40 %HTMLEmptyChecker,
1955     check_attrs => sub {
1956     my ($self, $item, $element_state) = @_;
1957 wakaba 1.1
1958 wakaba 1.40 if ($self->{has_base}) {
1959     $self->{onerror}->(node => $item->{node},
1960 wakaba 1.104 type => 'element not allowed:base',
1961     level => $self->{level}->{must});
1962 wakaba 1.40 } else {
1963     $self->{has_base} = 1;
1964 wakaba 1.29 }
1965    
1966 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1967     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1968 wakaba 1.14
1969     if ($self->{has_uri_attr} and $has_href) {
1970 wakaba 1.4 ## ISSUE: Are these examples conforming?
1971     ## <head profile="a b c"><base href> (except for |profile|'s
1972     ## non-conformance)
1973     ## <title xml:base="relative"/><base href/> (maybe it should be)
1974     ## <unknown xmlns="relative"/><base href/> (assuming that
1975     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1976     ## <style>@import 'relative';</style><base href>
1977     ## <script>location.href = 'relative';</script><base href>
1978 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1979     ## an exception.
1980 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1981 wakaba 1.104 type => 'basehref after URL attribute',
1982     level => $self->{level}->{must});
1983 wakaba 1.4 }
1984 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1985 wakaba 1.4 ## ISSUE: Are these examples conforming?
1986     ## <head><title xlink:href=""/><base target="name"/></head>
1987     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1988     ## (assuming that |xbl:xbl| is allowed before |base|)
1989     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1990     ## <link href=""/><base target="name"/>
1991     ## <link rel=unknown href=""><base target=name>
1992 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1993 wakaba 1.104 type => 'basetarget after hyperlink',
1994     level => $self->{level}->{must});
1995 wakaba 1.4 }
1996    
1997 wakaba 1.14 if (not $has_href and not $has_target) {
1998 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1999 wakaba 1.104 type => 'attribute missing:href|target',
2000     level => $self->{level}->{must});
2001 wakaba 1.14 }
2002    
2003 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
2004    
2005 wakaba 1.4 return $GetHTMLAttrsChecker->({
2006     href => $HTMLURIAttrChecker,
2007     target => $HTMLTargetAttrChecker,
2008 wakaba 1.49 }, {
2009     %HTMLAttrStatus,
2010 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2011 wakaba 1.187 id => FEATURE_HTML5_REC,
2012 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2013 wakaba 1.40 })->($self, $item, $element_state);
2014 wakaba 1.4 },
2015 wakaba 1.1 };
2016    
2017     $Element->{$HTML_NS}->{link} = {
2018 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2019 wakaba 1.40 %HTMLEmptyChecker,
2020     check_attrs => sub {
2021     my ($self, $item, $element_state) = @_;
2022 wakaba 1.96 my $sizes_attr;
2023 wakaba 1.1 $GetHTMLAttrsChecker->({
2024 wakaba 1.91 charset => sub {
2025     my ($self, $attr) = @_;
2026     $HTMLCharsetChecker->($attr->value, @_);
2027     },
2028 wakaba 1.1 href => $HTMLURIAttrChecker,
2029 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2030 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2031 wakaba 1.1 media => $HTMLMQAttrChecker,
2032     hreflang => $HTMLLanguageTagAttrChecker,
2033 wakaba 1.96 sizes => sub {
2034     my ($self, $attr) = @_;
2035     $sizes_attr = $attr;
2036     my %word;
2037     for my $word (grep {length $_}
2038 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2039 wakaba 1.96 unless ($word{$word}) {
2040     $word{$word} = 1;
2041     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2042     #
2043     } else {
2044     $self->{onerror}->(node => $attr,
2045 wakaba 1.104 type => 'sizes:syntax error',
2046 wakaba 1.96 value => $word,
2047 wakaba 1.104 level => $self->{level}->{must});
2048 wakaba 1.96 }
2049     } else {
2050     $self->{onerror}->(node => $attr, type => 'duplicate token',
2051     value => $word,
2052 wakaba 1.104 level => $self->{level}->{must});
2053 wakaba 1.96 }
2054     }
2055     },
2056 wakaba 1.70 target => $HTMLTargetAttrChecker,
2057 wakaba 1.1 type => $HTMLIMTAttrChecker,
2058     ## NOTE: Though |title| has special semantics,
2059     ## syntactically same as the |title| as global attribute.
2060 wakaba 1.49 }, {
2061     %HTMLAttrStatus,
2062 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2063 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2064     ## NOTE: |charset| attribute had been part of HTML5 spec though
2065     ## it had been commented out.
2066 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2067 wakaba 1.82 FEATURE_M12N10_REC,
2068 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2069 wakaba 1.187 lang => FEATURE_HTML5_REC,
2070 wakaba 1.153 media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2071 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2072 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2073 wakaba 1.153 FEATURE_M12N10_REC,
2074 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2075 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2076 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2077 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2078 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2079     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2080 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2081 wakaba 1.40 })->($self, $item, $element_state);
2082 wakaba 1.96
2083 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2084     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2085 wakaba 1.4 } else {
2086 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2087 wakaba 1.104 type => 'attribute missing',
2088     text => 'href',
2089     level => $self->{level}->{must});
2090 wakaba 1.1 }
2091 wakaba 1.96
2092 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2093     $self->{onerror}->(node => $item->{node},
2094 wakaba 1.104 type => 'attribute missing',
2095     text => 'rel',
2096     level => $self->{level}->{must});
2097 wakaba 1.96 }
2098    
2099     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2100     $self->{onerror}->(node => $sizes_attr,
2101     type => 'attribute not allowed',
2102 wakaba 1.104 level => $self->{level}->{must});
2103 wakaba 1.1 }
2104 wakaba 1.116
2105     if ($element_state->{link_rel}->{alternate} and
2106     $element_state->{link_rel}->{stylesheet}) {
2107     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2108     unless ($title_attr) {
2109     $self->{onerror}->(node => $item->{node},
2110     type => 'attribute missing',
2111     text => 'title',
2112     level => $self->{level}->{must});
2113     } elsif ($title_attr->value eq '') {
2114     $self->{onerror}->(node => $title_attr,
2115     type => 'empty style sheet title',
2116     level => $self->{level}->{must});
2117     }
2118     }
2119 wakaba 1.1 },
2120     };
2121    
2122     $Element->{$HTML_NS}->{meta} = {
2123 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2124 wakaba 1.40 %HTMLEmptyChecker,
2125     check_attrs => sub {
2126     my ($self, $item, $element_state) = @_;
2127 wakaba 1.1 my $name_attr;
2128     my $http_equiv_attr;
2129     my $charset_attr;
2130     my $content_attr;
2131 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2132 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2133     $attr_ns = '' unless defined $attr_ns;
2134     my $attr_ln = $attr->manakai_local_name;
2135     my $checker;
2136 wakaba 1.73 my $status;
2137 wakaba 1.1 if ($attr_ns eq '') {
2138 wakaba 1.73 $status = {
2139     %HTMLAttrStatus,
2140 wakaba 1.82 %XHTML2CommonAttrStatus,
2141 wakaba 1.153 charset => FEATURE_HTML5_WD,
2142     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2143 wakaba 1.187 dir => FEATURE_HTML5_REC,
2144 wakaba 1.153 'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2145 wakaba 1.187 id => FEATURE_HTML5_REC,
2146     lang => FEATURE_HTML5_REC,
2147 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2148 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2149     }->{$attr_ln};
2150    
2151 wakaba 1.1 if ($attr_ln eq 'content') {
2152     $content_attr = $attr;
2153     $checker = 1;
2154     } elsif ($attr_ln eq 'name') {
2155     $name_attr = $attr;
2156     $checker = 1;
2157     } elsif ($attr_ln eq 'http-equiv') {
2158     $http_equiv_attr = $attr;
2159     $checker = 1;
2160     } elsif ($attr_ln eq 'charset') {
2161     $charset_attr = $attr;
2162     $checker = 1;
2163 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2164 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2165 wakaba 1.67 $checker = sub {};
2166 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2167     $attr_ln !~ /[A-Z]/) {
2168 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2169     $status = $HTMLDatasetAttrStatus;
2170 wakaba 1.1 } else {
2171     $checker = $HTMLAttrChecker->{$attr_ln}
2172 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2173 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2174     }
2175     } else {
2176     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2177 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2178     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2179     || $AttrStatus->{$attr_ns}->{''};
2180     $status = FEATURE_ALLOWED if not defined $status;
2181 wakaba 1.1 }
2182 wakaba 1.62
2183 wakaba 1.1 if ($checker) {
2184 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2185 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2186 wakaba 1.54 #
2187 wakaba 1.1 } else {
2188 wakaba 1.104 $self->{onerror}->(node => $attr,
2189     type => 'unknown attribute',
2190     level => $self->{level}->{uncertain});
2191 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2192     }
2193    
2194 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2195 wakaba 1.1 }
2196    
2197     if (defined $name_attr) {
2198     if (defined $http_equiv_attr) {
2199     $self->{onerror}->(node => $http_equiv_attr,
2200 wakaba 1.104 type => 'attribute not allowed',
2201     level => $self->{level}->{must});
2202 wakaba 1.1 } elsif (defined $charset_attr) {
2203     $self->{onerror}->(node => $charset_attr,
2204 wakaba 1.104 type => 'attribute not allowed',
2205     level => $self->{level}->{must});
2206 wakaba 1.1 }
2207     my $metadata_name = $name_attr->value;
2208     my $metadata_value;
2209     if (defined $content_attr) {
2210     $metadata_value = $content_attr->value;
2211     } else {
2212 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2213 wakaba 1.104 type => 'attribute missing',
2214     text => 'content',
2215     level => $self->{level}->{must});
2216 wakaba 1.1 $metadata_value = '';
2217     }
2218     } elsif (defined $http_equiv_attr) {
2219     if (defined $charset_attr) {
2220     $self->{onerror}->(node => $charset_attr,
2221 wakaba 1.104 type => 'attribute not allowed',
2222     level => $self->{level}->{must});
2223 wakaba 1.1 }
2224     unless (defined $content_attr) {
2225 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2226 wakaba 1.104 type => 'attribute missing',
2227     text => 'content',
2228     level => $self->{level}->{must});
2229 wakaba 1.1 }
2230     } elsif (defined $charset_attr) {
2231     if (defined $content_attr) {
2232     $self->{onerror}->(node => $content_attr,
2233 wakaba 1.104 type => 'attribute not allowed',
2234     level => $self->{level}->{must});
2235 wakaba 1.1 }
2236     } else {
2237     if (defined $content_attr) {
2238     $self->{onerror}->(node => $content_attr,
2239 wakaba 1.104 type => 'attribute not allowed',
2240     level => $self->{level}->{must});
2241 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2242 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2243     level => $self->{level}->{must});
2244 wakaba 1.1 } else {
2245 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2246 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2247     level => $self->{level}->{must});
2248 wakaba 1.1 }
2249     }
2250    
2251 wakaba 1.32 my $check_charset_decl = sub () {
2252 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2253 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2254     for my $el (@{$parent->child_nodes}) {
2255     next unless $el->node_type == 1; # ELEMENT_NODE
2256 wakaba 1.40 unless ($el eq $item->{node}) {
2257 wakaba 1.29 ## NOTE: Not the first child element.
2258 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2259 wakaba 1.32 type => 'element not allowed:meta charset',
2260 wakaba 1.104 level => $self->{level}->{must});
2261 wakaba 1.29 }
2262     last;
2263     ## NOTE: Entity references are not supported.
2264     }
2265     } else {
2266 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2267 wakaba 1.32 type => 'element not allowed:meta charset',
2268 wakaba 1.104 level => $self->{level}->{must});
2269 wakaba 1.29 }
2270 wakaba 1.32 }; # $check_charset_decl
2271 wakaba 1.21
2272 wakaba 1.32 my $check_charset = sub ($$) {
2273     my ($attr, $charset_value) = @_;
2274 wakaba 1.21
2275 wakaba 1.91 my $charset;
2276     ($charset, $charset_value)
2277     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2278    
2279 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2280 wakaba 1.21 if (defined $ic) {
2281     ## TODO: Test for this case
2282     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2283     if ($charset ne $ic_charset) {
2284 wakaba 1.32 $self->{onerror}->(node => $attr,
2285 wakaba 1.104 type => 'mismatched charset name',
2286 wakaba 1.106 text => $ic,
2287 wakaba 1.104 value => $charset_value,
2288     level => $self->{level}->{must});
2289 wakaba 1.21 }
2290     } else {
2291     ## NOTE: MUST, but not checkable, since the document is not originally
2292     ## in serialized form (or the parser does not preserve the input
2293     ## encoding information).
2294 wakaba 1.32 $self->{onerror}->(node => $attr,
2295 wakaba 1.104 type => 'mismatched charset name not checked',
2296     value => $charset_value,
2297     level => $self->{level}->{uncertain});
2298 wakaba 1.21 }
2299    
2300 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2301     $self->{onerror}->(node => $attr,
2302 wakaba 1.104 type => 'charref in charset',
2303     level => $self->{level}->{must},
2304     layer => 'syntax');
2305 wakaba 1.22 }
2306 wakaba 1.32 }; # $check_charset
2307    
2308     ## TODO: metadata conformance
2309    
2310     ## TODO: pragma conformance
2311     if (defined $http_equiv_attr) { ## An enumerated attribute
2312     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2313 wakaba 1.33
2314 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2315     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2316     node => $http_equiv_attr,
2317 wakaba 1.104 level => $self->{level}->{must});
2318 wakaba 1.85 } else {
2319     $self->{has_http_equiv}->{$keyword} = 1;
2320     }
2321    
2322     if ($keyword eq 'content-type') {
2323 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2324 wakaba 1.33
2325 wakaba 1.32 $check_charset_decl->();
2326 wakaba 1.182
2327     unless ($item->{node}->owner_document->manakai_is_html) {
2328     $self->{onerror}->(node => $item->{node},
2329     type => 'in XML:charset',
2330     level => $self->{level}->{must});
2331     }
2332    
2333 wakaba 1.32 if ($content_attr) {
2334     my $content = $content_attr->value;
2335 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2336 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2337 wakaba 1.58 =(.+)\z!sx) {
2338 wakaba 1.32 $check_charset->($content_attr, $1);
2339     } else {
2340     $self->{onerror}->(node => $content_attr,
2341     type => 'meta content-type syntax error',
2342 wakaba 1.104 level => $self->{level}->{must});
2343 wakaba 1.85 }
2344     }
2345     } elsif ($keyword eq 'default-style') {
2346     ## ISSUE: Not defined yet in the spec.
2347     } elsif ($keyword eq 'refresh') {
2348     if ($content_attr) {
2349     my $content = $content_attr->value;
2350     if ($content =~ /\A[0-9]+\z/) {
2351     ## NOTE: Valid non-negative integer.
2352     #
2353 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2354 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2355     Whatpm::URIChecker->check_iri_reference ($content, sub {
2356 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2357 wakaba 1.106 }, $self->{level});
2358 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2359    
2360     $element_state->{uri_info}->{content}->{node} = $content_attr;
2361     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2362     ## TODO: absolute
2363     push @{$self->{return}->{uri}->{$content} ||= []},
2364     $element_state->{uri_info}->{content};
2365     } else {
2366     $self->{onerror}->(node => $content_attr,
2367     type => 'refresh:syntax error',
2368 wakaba 1.104 level => $self->{level}->{must});
2369 wakaba 1.32 }
2370     }
2371     } else {
2372     $self->{onerror}->(node => $http_equiv_attr,
2373 wakaba 1.104 type => 'enumerated:invalid',
2374     level => $self->{level}->{must});
2375 wakaba 1.32 }
2376     }
2377    
2378     if (defined $charset_attr) {
2379 wakaba 1.182 my $value = $charset_attr->value;
2380    
2381 wakaba 1.32 $check_charset_decl->();
2382 wakaba 1.182 $check_charset->($charset_attr, $value);
2383    
2384     if (not $item->{node}->owner_document->manakai_is_html and
2385     not $value =~ /\A[Uu][Tt][Ff]-8\z/) {
2386     $self->{onerror}->(node => $item->{node},
2387     type => 'in XML:charset',
2388     level => $self->{level}->{must});
2389     }
2390 wakaba 1.1 }
2391     },
2392     };
2393    
2394     $Element->{$HTML_NS}->{style} = {
2395 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2396 wakaba 1.40 %HTMLChecker,
2397     check_attrs => $GetHTMLAttrsChecker->({
2398 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2399     media => $HTMLMQAttrChecker,
2400     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2401     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2402     ## not different
2403 wakaba 1.49 }, {
2404     %HTMLAttrStatus,
2405 wakaba 1.82 %XHTML2CommonAttrStatus,
2406 wakaba 1.187 dir => FEATURE_HTML5_REC,
2407 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2408 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2409 wakaba 1.187 id => FEATURE_HTML5_REC,
2410     lang => FEATURE_HTML5_REC,
2411 wakaba 1.153 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2412     scoped => FEATURE_HTML5_FD,
2413 wakaba 1.187 title => FEATURE_HTML5_REC,
2414 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2415 wakaba 1.1 }),
2416 wakaba 1.40 check_start => sub {
2417     my ($self, $item, $element_state) = @_;
2418    
2419 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2420 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2421 wakaba 1.93 $type = 'text/css' unless defined $type;
2422     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2423     $type = "$1/$2";
2424     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2425     } else {
2426     ## NOTE: We don't know how parameters are handled by UAs. According to
2427     ## HTML5 specification, <style> with unknown parameters in |type=""|
2428     ## must be ignored.
2429     undef $type;
2430     }
2431     if (not defined $type) {
2432     $element_state->{allow_element} = 1; # invalid type=""
2433     } elsif ($type eq 'text/css') {
2434 wakaba 1.40 $element_state->{allow_element} = 0;
2435 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2436     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2437     # $element_state->{allow_element} = 1;
2438 wakaba 1.40 } else {
2439     $element_state->{allow_element} = 1; # unknown
2440     }
2441 wakaba 1.93 $element_state->{style_type} = $type;
2442 wakaba 1.79
2443     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2444     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2445 wakaba 1.107
2446     $element_state->{text} = '';
2447 wakaba 1.40 },
2448     check_child_element => sub {
2449     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2450     $child_is_transparent, $element_state) = @_;
2451 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2452     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2453 wakaba 1.40 $self->{onerror}->(node => $child_el,
2454     type => 'element not allowed:minus',
2455 wakaba 1.104 level => $self->{level}->{must});
2456 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2457     #
2458     } elsif ($element_state->{allow_element}) {
2459     #
2460     } else {
2461 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2462     level => $self->{level}->{must});
2463 wakaba 1.40 }
2464     },
2465     check_child_text => sub {
2466     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2467 wakaba 1.115 $element_state->{text} .= $child_node->data;
2468 wakaba 1.40 },
2469     check_end => sub {
2470     my ($self, $item, $element_state) = @_;
2471 wakaba 1.93 if (not defined $element_state->{style_type}) {
2472     ## NOTE: Invalid type=""
2473     #
2474     } elsif ($element_state->{style_type} eq 'text/css') {
2475 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2476     container_node => $item->{node},
2477 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2478 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2479     ## NOTE: XML content should be checked by THIS instance of checker
2480     ## as part of normal tree validation. However, we don't know of any
2481     ## XML-based styling language that can be used in HTML <style> element,
2482     ## such that we throw a "style language not supported" error.
2483 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2484     type => 'XML style lang',
2485     text => $element_state->{style_type},
2486     level => $self->{level}->{uncertain});
2487 wakaba 1.93 } else {
2488     ## NOTE: Should we raise some kind of error for,
2489     ## say, <style type="text/plaion">?
2490     $self->{onsubdoc}->({s => $element_state->{text},
2491     container_node => $item->{node},
2492     media_type => $element_state->{style_type},
2493     is_char_string => 1});
2494 wakaba 1.27 }
2495 wakaba 1.40
2496     $HTMLChecker{check_end}->(@_);
2497 wakaba 1.1 },
2498     };
2499 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2500 wakaba 1.1
2501     $Element->{$HTML_NS}->{body} = {
2502 wakaba 1.72 %HTMLFlowContentChecker,
2503 wakaba 1.187 status => FEATURE_HTML5_REC,
2504 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2505     alink => $HTMLColorAttrChecker,
2506     background => $HTMLURIAttrChecker,
2507     bgcolor => $HTMLColorAttrChecker,
2508     link => $HTMLColorAttrChecker,
2509 wakaba 1.188 onafterprint => $HTMLEventHandlerAttrChecker,
2510     onbeforeprint => $HTMLEventHandlerAttrChecker,
2511     onbeforeunload => $HTMLEventHandlerAttrChecker,
2512     onblur => $HTMLEventHandlerAttrChecker,
2513     onerror => $HTMLEventHandlerAttrChecker,
2514     onfocus => $HTMLEventHandlerAttrChecker,
2515     onhashchange => $HTMLEventHandlerAttrChecker,
2516     onload => $HTMLEventHandlerAttrChecker,
2517     onmessage => $HTMLEventHandlerAttrChecker,
2518     onoffline => $HTMLEventHandlerAttrChecker,
2519     ononline => $HTMLEventHandlerAttrChecker,
2520 wakaba 1.186 onpopstate => $HTMLEventHandlerAttrChecker,
2521 wakaba 1.188 onredo => $HTMLEventHandlerAttrChecker,
2522     onresize => $HTMLEventHandlerAttrChecker,
2523     onstorage => $HTMLEventHandlerAttrChecker,
2524     onundo => $HTMLEventHandlerAttrChecker,
2525     onunload => $HTMLEventHandlerAttrChecker,
2526 wakaba 1.68 text => $HTMLColorAttrChecker,
2527     vlink => $HTMLColorAttrChecker,
2528     }, {
2529 wakaba 1.49 %HTMLAttrStatus,
2530 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2531 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2532     background => FEATURE_M12N10_REC_DEPRECATED,
2533     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2534 wakaba 1.187 lang => FEATURE_HTML5_REC,
2535 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2536 wakaba 1.188 onafterprint => FEATURE_HTML5_LC,
2537     onbeforeprint => FEATURE_HTML5_LC,
2538     onbeforeunload => FEATURE_HTML5_LC,
2539     onblur => FEATURE_HTML5_LC,
2540     onerror => FEATURE_HTML5_LC,
2541     onfocus => FEATURE_HTML5_LC,
2542     onhashchange => FEATURE_HTML5_LC,
2543     onload => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2544     onmessage => FEATURE_HTML5_LC,
2545     onoffline => FEATURE_HTML5_LC,
2546     ononline => FEATURE_HTML5_LC,
2547 wakaba 1.186 onpopstate => FEATURE_HTML5_LC,
2548 wakaba 1.188 onredo => FEATURE_HTML5_LC,
2549     onresize => FEATURE_HTML5_LC,
2550     onstorage => FEATURE_HTML5_LC,
2551     onundo => FEATURE_HTML5_LC,
2552     onunload => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2553 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2554     vlink => FEATURE_M12N10_REC_DEPRECATED,
2555     }),
2556 wakaba 1.68 check_start => sub {
2557     my ($self, $item, $element_state) = @_;
2558    
2559     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2560 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2561     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2562 wakaba 1.68 },
2563 wakaba 1.1 };
2564    
2565     $Element->{$HTML_NS}->{section} = {
2566 wakaba 1.72 %HTMLFlowContentChecker,
2567 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2568 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2569 wakaba 1.189 cite => $HTMLURIAttrChecker,
2570 wakaba 1.82 }, {
2571     %HTMLAttrStatus,
2572     %XHTML2CommonAttrStatus,
2573 wakaba 1.189 cite => FEATURE_HTML5_DROPPED | FEATURE_XHTML2_ED,
2574 wakaba 1.82 }),
2575 wakaba 1.1 };
2576    
2577     $Element->{$HTML_NS}->{nav} = {
2578 wakaba 1.153 status => FEATURE_HTML5_LC,
2579 wakaba 1.72 %HTMLFlowContentChecker,
2580 wakaba 1.1 };
2581    
2582     $Element->{$HTML_NS}->{article} = {
2583 wakaba 1.174 %HTMLFlowContentChecker,
2584 wakaba 1.153 status => FEATURE_HTML5_LC,
2585 wakaba 1.174 check_attrs => $GetHTMLAttrsChecker->({
2586 wakaba 1.189 cite => $HTMLURIAttrChecker,
2587 wakaba 1.174 pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2588     }, {
2589     %HTMLAttrStatus,
2590 wakaba 1.189 cite => FEATURE_HTML5_DROPPED,
2591 wakaba 1.174 pubdate => FEATURE_HTML5_LC,
2592     }),
2593     }; # article
2594 wakaba 1.1
2595     $Element->{$HTML_NS}->{blockquote} = {
2596 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2597 wakaba 1.72 %HTMLFlowContentChecker,
2598 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2599 wakaba 1.1 cite => $HTMLURIAttrChecker,
2600 wakaba 1.49 }, {
2601     %HTMLAttrStatus,
2602 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2603 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2604 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2605 wakaba 1.187 lang => FEATURE_HTML5_REC,
2606 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2607 wakaba 1.1 }),
2608 wakaba 1.66 check_start => sub {
2609     my ($self, $item, $element_state) = @_;
2610    
2611     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2612 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2613     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2614 wakaba 1.66 },
2615 wakaba 1.1 };
2616    
2617     $Element->{$HTML_NS}->{aside} = {
2618 wakaba 1.153 status => FEATURE_HTML5_LC,
2619 wakaba 1.72 %HTMLFlowContentChecker,
2620 wakaba 1.1 };
2621    
2622     $Element->{$HTML_NS}->{h1} = {
2623 wakaba 1.40 %HTMLPhrasingContentChecker,
2624 wakaba 1.187 status => FEATURE_HTML5_REC,
2625 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2626     align => $GetHTMLEnumeratedAttrChecker->({
2627     left => 1, center => 1, right => 1, justify => 1,
2628     }),
2629     }, {
2630 wakaba 1.49 %HTMLAttrStatus,
2631 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2632 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2633 wakaba 1.187 lang => FEATURE_HTML5_REC,
2634 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2635 wakaba 1.49 }),
2636 wakaba 1.40 check_start => sub {
2637     my ($self, $item, $element_state) = @_;
2638     $self->{flag}->{has_hn} = 1;
2639 wakaba 1.79
2640     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2641     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2642 wakaba 1.1 },
2643     };
2644    
2645 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2646 wakaba 1.1
2647 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2648 wakaba 1.1
2649 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2650 wakaba 1.1
2651 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2652 wakaba 1.1
2653 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2654 wakaba 1.1
2655 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2656 wakaba 1.174
2657     # XXX footer in header is disallowed (HTML5 revision 3050)
2658 wakaba 1.29
2659 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2660 wakaba 1.153 status => FEATURE_HTML5_LC,
2661 wakaba 1.72 %HTMLFlowContentChecker,
2662 wakaba 1.40 check_start => sub {
2663     my ($self, $item, $element_state) = @_;
2664     $self->_add_minus_elements ($element_state,
2665     {$HTML_NS => {qw/header 1 footer 1/}},
2666 wakaba 1.58 $HTMLSectioningContent);
2667 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2668     $self->{flag}->{has_hn} = 0;
2669 wakaba 1.79
2670     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2671     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2672 wakaba 1.40 },
2673     check_end => sub {
2674     my ($self, $item, $element_state) = @_;
2675     $self->_remove_minus_elements ($element_state);
2676     unless ($self->{flag}->{has_hn}) {
2677     $self->{onerror}->(node => $item->{node},
2678 wakaba 1.104 type => 'element missing:hn',
2679     level => $self->{level}->{must});
2680 wakaba 1.40 }
2681     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2682 wakaba 1.1
2683 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2684 wakaba 1.1 },
2685 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2686 wakaba 1.1 };
2687    
2688     $Element->{$HTML_NS}->{footer} = {
2689 wakaba 1.153 status => FEATURE_HTML5_LC,
2690 wakaba 1.72 %HTMLFlowContentChecker,
2691 wakaba 1.40 check_start => sub {
2692     my ($self, $item, $element_state) = @_;
2693     $self->_add_minus_elements ($element_state,
2694 wakaba 1.177 {$HTML_NS => {header => 1, footer => 1}},
2695 wakaba 1.58 $HTMLSectioningContent,
2696 wakaba 1.57 $HTMLHeadingContent);
2697 wakaba 1.79
2698     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2699     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2700 wakaba 1.40 },
2701     check_end => sub {
2702     my ($self, $item, $element_state) = @_;
2703     $self->_remove_minus_elements ($element_state);
2704 wakaba 1.1
2705 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2706 wakaba 1.1 },
2707     };
2708    
2709     $Element->{$HTML_NS}->{address} = {
2710 wakaba 1.72 %HTMLFlowContentChecker,
2711 wakaba 1.187 status => FEATURE_HTML5_REC,
2712 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2713     ## TODO: add test
2714     #align => $GetHTMLEnumeratedAttrChecker->({
2715     # left => 1, center => 1, right => 1, justify => 1,
2716     #}),
2717     }, {
2718 wakaba 1.49 %HTMLAttrStatus,
2719 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2720 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2721 wakaba 1.187 lang => FEATURE_HTML5_REC,
2722 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2723     sdapref => FEATURE_HTML20_RFC,
2724 wakaba 1.49 }),
2725 wakaba 1.40 check_start => sub {
2726     my ($self, $item, $element_state) = @_;
2727 wakaba 1.177 $self->_add_minus_elements
2728     ($element_state,
2729     {$HTML_NS => {header => 1, footer => 1, address => 1}},
2730     $HTMLSectioningContent, $HTMLHeadingContent);
2731 wakaba 1.79
2732     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2733     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2734 wakaba 1.40 },
2735     check_end => sub {
2736     my ($self, $item, $element_state) = @_;
2737     $self->_remove_minus_elements ($element_state);
2738 wakaba 1.29
2739 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2740 wakaba 1.29 },
2741 wakaba 1.1 };
2742    
2743     $Element->{$HTML_NS}->{p} = {
2744 wakaba 1.40 %HTMLPhrasingContentChecker,
2745 wakaba 1.187 status => FEATURE_HTML5_REC,
2746 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2747     align => $GetHTMLEnumeratedAttrChecker->({
2748     left => 1, center => 1, right => 1, justify => 1,
2749     }),
2750     }, {
2751 wakaba 1.49 %HTMLAttrStatus,
2752 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2753 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2754 wakaba 1.187 lang => FEATURE_HTML5_REC,
2755 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2756 wakaba 1.49 }),
2757 wakaba 1.1 };
2758    
2759     $Element->{$HTML_NS}->{hr} = {
2760 wakaba 1.40 %HTMLEmptyChecker,
2761 wakaba 1.187 status => FEATURE_HTML5_REC,
2762 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2763     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2764     }, {
2765 wakaba 1.49 %HTMLAttrStatus,
2766     %HTMLM12NCommonAttrStatus,
2767     align => FEATURE_M12N10_REC_DEPRECATED,
2768 wakaba 1.187 lang => FEATURE_HTML5_REC,
2769 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2770 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2771 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2772     width => FEATURE_M12N10_REC_DEPRECATED,
2773     }),
2774 wakaba 1.1 };
2775    
2776     $Element->{$HTML_NS}->{br} = {
2777 wakaba 1.40 %HTMLEmptyChecker,
2778 wakaba 1.187 status => FEATURE_HTML5_REC,
2779 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2780     clear => $GetHTMLEnumeratedAttrChecker->({
2781     left => 1, all => 1, right => 1, none => 1,
2782     }),
2783     }, {
2784 wakaba 1.49 %HTMLAttrStatus,
2785 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2786 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2787 wakaba 1.187 id => FEATURE_HTML5_REC,
2788 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2789 wakaba 1.187 style => FEATURE_HTML5_REC,
2790     title => FEATURE_HTML5_REC,
2791 wakaba 1.49 }),
2792 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2793     ## (This requirement is semantic so that we cannot check.)
2794 wakaba 1.1 };
2795    
2796     $Element->{$HTML_NS}->{dialog} = {
2797 wakaba 1.153 status => FEATURE_HTML5_WD,
2798 wakaba 1.40 %HTMLChecker,
2799     check_start => sub {
2800     my ($self, $item, $element_state) = @_;
2801     $element_state->{phase} = 'before dt';
2802 wakaba 1.79
2803     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2804     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2805 wakaba 1.40 },
2806     check_child_element => sub {
2807     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2808     $child_is_transparent, $element_state) = @_;
2809 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2810     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2811 wakaba 1.40 $self->{onerror}->(node => $child_el,
2812     type => 'element not allowed:minus',
2813 wakaba 1.104 level => $self->{level}->{must});
2814 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2815     #
2816     } elsif ($element_state->{phase} eq 'before dt') {
2817     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2818     $element_state->{phase} = 'before dd';
2819     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2820     $self->{onerror}
2821 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2822     text => 'dt',
2823     level => $self->{level}->{must});
2824 wakaba 1.40 $element_state->{phase} = 'before dt';
2825     } else {
2826 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2827     level => $self->{level}->{must});
2828 wakaba 1.40 }
2829     } elsif ($element_state->{phase} eq 'before dd') {
2830     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2831     $element_state->{phase} = 'before dt';
2832     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2833     $self->{onerror}
2834 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2835     text => 'dd',
2836     level => $self->{level}->{must});
2837 wakaba 1.40 $element_state->{phase} = 'before dd';
2838     } else {
2839 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2840     level => $self->{level}->{must});
2841 wakaba 1.1 }
2842 wakaba 1.40 } else {
2843     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2844     }
2845     },
2846     check_child_text => sub {
2847     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2848     if ($has_significant) {
2849 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2850     level => $self->{level}->{must});
2851 wakaba 1.1 }
2852 wakaba 1.40 },
2853     check_end => sub {
2854     my ($self, $item, $element_state) = @_;
2855     if ($element_state->{phase} eq 'before dd') {
2856     $self->{onerror}->(node => $item->{node},
2857 wakaba 1.104 type => 'child element missing',
2858     text => 'dd',
2859     level => $self->{level}->{must});
2860 wakaba 1.1 }
2861 wakaba 1.40
2862     $HTMLChecker{check_end}->(@_);
2863 wakaba 1.1 },
2864     };
2865    
2866     $Element->{$HTML_NS}->{pre} = {
2867 wakaba 1.40 %HTMLPhrasingContentChecker,
2868 wakaba 1.187 status => FEATURE_HTML5_REC,
2869 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2870     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2871     }, {
2872 wakaba 1.49 %HTMLAttrStatus,
2873 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2874 wakaba 1.187 lang => FEATURE_HTML5_REC,
2875 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2876 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2877     }),
2878 wakaba 1.101 check_end => sub {
2879     my ($self, $item, $element_state) = @_;
2880    
2881     ## TODO: Flag to enable/disable IDL checking?
2882 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2883 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2884     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2885     ## NOTE: pre.code > code.idl-code: WebIDL spec
2886     ## NOTE: pre.idl-code: DOM1 spec
2887     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2888     ## NOTE: pre.schema: ReSpec-generated specs
2889 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2890     container_node => $item->{node},
2891     media_type => 'text/x-webidl',
2892     is_char_string => 1});
2893     }
2894    
2895 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2896 wakaba 1.101 },
2897 wakaba 1.1 };
2898    
2899     $Element->{$HTML_NS}->{ol} = {
2900 wakaba 1.40 %HTMLChecker,
2901 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2902 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2903 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2904 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2905 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2906 wakaba 1.69 ## TODO: HTML4 |type|
2907 wakaba 1.49 }, {
2908     %HTMLAttrStatus,
2909 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2910 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2911 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2912 wakaba 1.187 lang => FEATURE_HTML5_REC,
2913 wakaba 1.153 reversed => FEATURE_HTML5_WD,
2914 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2915 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2916     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2917 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2918 wakaba 1.1 }),
2919 wakaba 1.40 check_child_element => sub {
2920     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2921     $child_is_transparent, $element_state) = @_;
2922 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2923     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2924 wakaba 1.40 $self->{onerror}->(node => $child_el,
2925     type => 'element not allowed:minus',
2926 wakaba 1.104 level => $self->{level}->{must});
2927 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2928     #
2929     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2930     #
2931     } else {
2932 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2933     level => $self->{level}->{must});
2934 wakaba 1.1 }
2935 wakaba 1.40 },
2936     check_child_text => sub {
2937     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2938     if ($has_significant) {
2939 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2940     level => $self->{level}->{must});
2941 wakaba 1.1 }
2942     },
2943     };
2944    
2945     $Element->{$HTML_NS}->{ul} = {
2946 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2947 wakaba 1.187 status => FEATURE_HTML5_REC,
2948 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2949     compact => $GetHTMLBooleanAttrChecker->('compact'),
2950 wakaba 1.69 ## TODO: HTML4 |type|
2951     ## TODO: sdaform, align
2952 wakaba 1.68 }, {
2953 wakaba 1.49 %HTMLAttrStatus,
2954 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2955 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2956 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2957 wakaba 1.187 lang => FEATURE_HTML5_REC,
2958 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2959 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2960     }),
2961 wakaba 1.1 };
2962    
2963 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2964     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2965     %{$Element->{$HTML_NS}->{ul}},
2966     status => FEATURE_M12N10_REC_DEPRECATED,
2967 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2968     compact => $GetHTMLBooleanAttrChecker->('compact'),
2969     }, {
2970 wakaba 1.64 %HTMLAttrStatus,
2971     %HTMLM12NCommonAttrStatus,
2972     align => FEATURE_HTML2X_RFC,
2973     compact => FEATURE_M12N10_REC_DEPRECATED,
2974 wakaba 1.187 lang => FEATURE_HTML5_REC,
2975 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2976     sdapref => FEATURE_HTML20_RFC,
2977     }),
2978     };
2979    
2980 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2981 wakaba 1.72 %HTMLFlowContentChecker,
2982 wakaba 1.187 status => FEATURE_HTML5_REC,
2983 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2984 wakaba 1.69 ## TODO: HTML4 |type|
2985 wakaba 1.49 value => sub {
2986 wakaba 1.1 my ($self, $attr) = @_;
2987 wakaba 1.152
2988     my $parent_is_ol;
2989 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2990     if (defined $parent) {
2991     my $parent_ns = $parent->namespace_uri;
2992     $parent_ns = '' unless defined $parent_ns;
2993     my $parent_ln = $parent->manakai_local_name;
2994 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2995     }
2996    
2997     unless ($parent_is_ol) {
2998     ## ISSUE: No "MUST" in the spec.
2999     $self->{onerror}->(node => $attr,
3000     type => 'non-ol li value',
3001     level => $self->{level}->{html5_fact});
3002 wakaba 1.1 }
3003 wakaba 1.152
3004 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
3005 wakaba 1.131 },
3006 wakaba 1.49 }, {
3007     %HTMLAttrStatus,
3008 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3009 wakaba 1.61 align => FEATURE_HTML2X_RFC,
3010 wakaba 1.187 lang => FEATURE_HTML5_REC,
3011 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3012 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3013 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
3014 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
3015 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
3016 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
3017 wakaba 1.1 }),
3018 wakaba 1.40 check_child_element => sub {
3019     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3020     $child_is_transparent, $element_state) = @_;
3021     if ($self->{flag}->{in_menu}) {
3022 wakaba 1.152 ## TODO: In <dir> element, then ...
3023 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
3024     } else {
3025 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
3026 wakaba 1.40 }
3027     },
3028     check_child_text => sub {
3029     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3030     if ($self->{flag}->{in_menu}) {
3031 wakaba 1.152 ## TODO: In <dir> element, then ...
3032 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
3033 wakaba 1.1 } else {
3034 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
3035 wakaba 1.1 }
3036     },
3037     };
3038    
3039     $Element->{$HTML_NS}->{dl} = {
3040 wakaba 1.40 %HTMLChecker,
3041 wakaba 1.187 status => FEATURE_HTML5_REC,
3042 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
3043     compact => $GetHTMLBooleanAttrChecker->('compact'),
3044     }, {
3045 wakaba 1.49 %HTMLAttrStatus,
3046 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3047 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
3048 wakaba 1.187 lang => FEATURE_HTML5_REC,
3049 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3050     sdapref => FEATURE_HTML20_RFC,
3051 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3052     }),
3053 wakaba 1.40 check_start => sub {
3054     my ($self, $item, $element_state) = @_;
3055     $element_state->{phase} = 'before dt';
3056 wakaba 1.79
3057     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3058     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3059 wakaba 1.40 },
3060     check_child_element => sub {
3061     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3062     $child_is_transparent, $element_state) = @_;
3063 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3064     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3065 wakaba 1.40 $self->{onerror}->(node => $child_el,
3066     type => 'element not allowed:minus',
3067 wakaba 1.104 level => $self->{level}->{must});
3068 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3069     #
3070     } elsif ($element_state->{phase} eq 'in dds') {
3071     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3072     #$element_state->{phase} = 'in dds';
3073     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3074     $element_state->{phase} = 'in dts';
3075     } else {
3076 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3077     level => $self->{level}->{must});
3078 wakaba 1.40 }
3079     } elsif ($element_state->{phase} eq 'in dts') {
3080     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3081     #$element_state->{phase} = 'in dts';
3082     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3083     $element_state->{phase} = 'in dds';
3084     } else {
3085 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3086     level => $self->{level}->{must});
3087 wakaba 1.40 }
3088     } elsif ($element_state->{phase} eq 'before dt') {
3089     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3090     $element_state->{phase} = 'in dts';
3091     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3092     $self->{onerror}
3093 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3094     text => 'dt',
3095     level => $self->{level}->{must});
3096 wakaba 1.40 $element_state->{phase} = 'in dds';
3097     } else {
3098 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3099     level => $self->{level}->{must});
3100 wakaba 1.1 }
3101 wakaba 1.40 } else {
3102     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3103 wakaba 1.1 }
3104 wakaba 1.40 },
3105     check_child_text => sub {
3106     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3107     if ($has_significant) {
3108 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3109     level => $self->{level}->{must});
3110 wakaba 1.40 }
3111     },
3112     check_end => sub {
3113     my ($self, $item, $element_state) = @_;
3114     if ($element_state->{phase} eq 'in dts') {
3115     $self->{onerror}->(node => $item->{node},
3116 wakaba 1.104 type => 'child element missing',
3117     text => 'dd',
3118     level => $self->{level}->{must});
3119 wakaba 1.1 }
3120    
3121 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3122 wakaba 1.1 },
3123     };
3124    
3125     $Element->{$HTML_NS}->{dt} = {
3126 wakaba 1.40 %HTMLPhrasingContentChecker,
3127 wakaba 1.187 status => FEATURE_HTML5_REC,
3128 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3129     %HTMLAttrStatus,
3130 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3131 wakaba 1.187 lang => FEATURE_HTML5_REC,
3132 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3133 wakaba 1.49 }),
3134 wakaba 1.1 };
3135    
3136     $Element->{$HTML_NS}->{dd} = {
3137 wakaba 1.72 %HTMLFlowContentChecker,
3138 wakaba 1.187 status => FEATURE_HTML5_REC,
3139 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3140     %HTMLAttrStatus,
3141 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3142 wakaba 1.187 lang => FEATURE_HTML5_REC,
3143 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3144 wakaba 1.49 }),
3145 wakaba 1.1 };
3146    
3147     $Element->{$HTML_NS}->{a} = {
3148 wakaba 1.123 %HTMLTransparentChecker,
3149 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3150 wakaba 1.40 check_attrs => sub {
3151     my ($self, $item, $element_state) = @_;
3152 wakaba 1.1 my %attr;
3153 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3154 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3155     $attr_ns = '' unless defined $attr_ns;
3156     my $attr_ln = $attr->manakai_local_name;
3157     my $checker;
3158 wakaba 1.73 my $status;
3159 wakaba 1.1 if ($attr_ns eq '') {
3160 wakaba 1.73 $status = {
3161     %HTMLAttrStatus,
3162 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3163 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
3164 wakaba 1.73 charset => FEATURE_M12N10_REC,
3165 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3166 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3167     dn => FEATURE_RFC2659,
3168 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3169 wakaba 1.153 FEATURE_M12N10_REC,
3170     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3171     FEATURE_M12N10_REC,
3172 wakaba 1.187 lang => FEATURE_HTML5_REC,
3173 wakaba 1.153 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3174 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3175     name => FEATURE_M12N10_REC_DEPRECATED,
3176     nonce => FEATURE_RFC2659,
3177     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3178     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3179 wakaba 1.153 ping => FEATURE_HTML5_WD,
3180 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3181     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3182 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3183 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3184 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3185 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3186     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3187 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3188     }->{$attr_ln};
3189    
3190 wakaba 1.1 $checker = {
3191 wakaba 1.91 charset => sub {
3192     my ($self, $attr) = @_;
3193     $HTMLCharsetChecker->($attr->value, @_);
3194     },
3195 wakaba 1.70 ## TODO: HTML4 |coords|
3196 wakaba 1.1 target => $HTMLTargetAttrChecker,
3197     href => $HTMLURIAttrChecker,
3198     ping => $HTMLSpaceURIsAttrChecker,
3199 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3200 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3201 wakaba 1.70 ## TODO: HTML4 |shape|
3202 wakaba 1.1 media => $HTMLMQAttrChecker,
3203 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3204 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3205     type => $HTMLIMTAttrChecker,
3206     }->{$attr_ln};
3207     if ($checker) {
3208     $attr{$attr_ln} = $attr;
3209 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3210     $attr_ln !~ /[A-Z]/) {
3211 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3212     $status = $HTMLDatasetAttrStatus;
3213 wakaba 1.1 } else {
3214     $checker = $HTMLAttrChecker->{$attr_ln};
3215     }
3216     }
3217     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3218     || $AttrChecker->{$attr_ns}->{''};
3219 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3220     || $AttrStatus->{$attr_ns}->{''};
3221     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3222 wakaba 1.62
3223 wakaba 1.1 if ($checker) {
3224 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3225 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3226 wakaba 1.54 #
3227 wakaba 1.1 } else {
3228 wakaba 1.104 $self->{onerror}->(node => $attr,
3229     type => 'unknown attribute',
3230     level => $self->{level}->{uncertain});
3231 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3232 wakaba 1.1 }
3233 wakaba 1.49
3234 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3235 wakaba 1.1 }
3236    
3237 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3238 wakaba 1.4 if (defined $attr{href}) {
3239     $self->{has_hyperlink_element} = 1;
3240 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3241 wakaba 1.4 } else {
3242 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3243     if (defined $attr{$_}) {
3244     $self->{onerror}->(node => $attr{$_},
3245 wakaba 1.104 type => 'attribute not allowed',
3246     level => $self->{level}->{must});
3247 wakaba 1.1 }
3248     }
3249     }
3250 wakaba 1.66
3251     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3252 wakaba 1.1 },
3253 wakaba 1.40 check_start => sub {
3254     my ($self, $item, $element_state) = @_;
3255     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3256 wakaba 1.79
3257     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3258     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3259 wakaba 1.40 },
3260     check_end => sub {
3261     my ($self, $item, $element_state) = @_;
3262     $self->_remove_minus_elements ($element_state);
3263 wakaba 1.59 delete $self->{flag}->{in_a_href}
3264     unless $element_state->{in_a_href_original};
3265 wakaba 1.1
3266 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3267 wakaba 1.1 },
3268     };
3269    
3270     $Element->{$HTML_NS}->{q} = {
3271 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3272 wakaba 1.40 %HTMLPhrasingContentChecker,
3273     check_attrs => $GetHTMLAttrsChecker->({
3274 wakaba 1.50 cite => $HTMLURIAttrChecker,
3275     }, {
3276 wakaba 1.49 %HTMLAttrStatus,
3277 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3278 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3279 wakaba 1.187 lang => FEATURE_HTML5_REC,
3280 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3281     sdasuff => FEATURE_HTML2X_RFC,
3282 wakaba 1.1 }),
3283 wakaba 1.66 check_start => sub {
3284     my ($self, $item, $element_state) = @_;
3285    
3286     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3287 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3288     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3289 wakaba 1.66 },
3290 wakaba 1.1 };
3291 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3292     ## placed inside the <code>q</code> element." Though we cannot test the
3293     ## element against this requirement since it incluides a semantic bit,
3294     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3295     ## the |q| element.
3296 wakaba 1.1
3297     $Element->{$HTML_NS}->{cite} = {
3298 wakaba 1.40 %HTMLPhrasingContentChecker,
3299 wakaba 1.187 status => FEATURE_HTML5_REC,
3300 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3301     %HTMLAttrStatus,
3302 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3303 wakaba 1.187 lang => FEATURE_HTML5_REC,
3304 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3305 wakaba 1.49 }),
3306 wakaba 1.1 };
3307    
3308     $Element->{$HTML_NS}->{em} = {
3309 wakaba 1.40 %HTMLPhrasingContentChecker,
3310 wakaba 1.187 status => FEATURE_HTML5_REC,
3311 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3312     %HTMLAttrStatus,
3313 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3314 wakaba 1.187 lang => FEATURE_HTML5_REC,
3315 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3316 wakaba 1.49 }),
3317 wakaba 1.1 };
3318    
3319     $Element->{$HTML_NS}->{strong} = {
3320 wakaba 1.40 %HTMLPhrasingContentChecker,
3321 wakaba 1.187 status => FEATURE_HTML5_REC,
3322 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3323     %HTMLAttrStatus,
3324 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3325 wakaba 1.187 lang => FEATURE_HTML5_REC,
3326 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3327 wakaba 1.49 }),
3328 wakaba 1.1 };
3329    
3330     $Element->{$HTML_NS}->{small} = {
3331 wakaba 1.40 %HTMLPhrasingContentChecker,
3332 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3333 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3334     %HTMLAttrStatus,
3335     %HTMLM12NCommonAttrStatus,
3336 wakaba 1.187 lang => FEATURE_HTML5_REC,
3337 wakaba 1.49 }),
3338 wakaba 1.1 };
3339    
3340 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3341     %HTMLPhrasingContentChecker,
3342     status => FEATURE_M12N10_REC,
3343     check_attrs => $GetHTMLAttrsChecker->({}, {
3344     %HTMLAttrStatus,
3345     %HTMLM12NCommonAttrStatus,
3346 wakaba 1.187 lang => FEATURE_HTML5_REC,
3347 wakaba 1.51 }),
3348     };
3349    
3350 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3351 wakaba 1.187 status => FEATURE_HTML5_WD,
3352 wakaba 1.40 %HTMLPhrasingContentChecker,
3353 wakaba 1.1 };
3354    
3355     $Element->{$HTML_NS}->{dfn} = {
3356 wakaba 1.40 %HTMLPhrasingContentChecker,
3357 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3358 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3359     %HTMLAttrStatus,
3360 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3361 wakaba 1.187 lang => FEATURE_HTML5_REC,
3362 wakaba 1.49 }),
3363 wakaba 1.40 check_start => sub {
3364     my ($self, $item, $element_state) = @_;
3365     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3366 wakaba 1.1
3367 wakaba 1.40 my $node = $item->{node};
3368 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3369     unless (defined $term) {
3370     for my $child (@{$node->child_nodes}) {
3371     if ($child->node_type == 1) { # ELEMENT_NODE
3372     if (defined $term) {
3373     undef $term;
3374     last;
3375     } elsif ($child->manakai_local_name eq 'abbr') {
3376     my $nsuri = $child->namespace_uri;
3377     if (defined $nsuri and $nsuri eq $HTML_NS) {
3378     my $attr = $child->get_attribute_node_ns (undef, 'title');
3379     if ($attr) {
3380     $term = $attr->value;
3381     }
3382     }
3383     }
3384     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3385     ## TEXT_NODE or CDATA_SECTION_NODE
3386 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3387 wakaba 1.1 next;
3388     }
3389     undef $term;
3390     last;
3391     }
3392     }
3393     unless (defined $term) {
3394     $term = $node->text_content;
3395     }
3396     }
3397     if ($self->{term}->{$term}) {
3398     push @{$self->{term}->{$term}}, $node;
3399     } else {
3400     $self->{term}->{$term} = [$node];
3401     }
3402 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3403     ## |ruby| unless |dfn| has |title|.
3404 wakaba 1.79
3405     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3406     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3407 wakaba 1.40 },
3408     check_end => sub {
3409     my ($self, $item, $element_state) = @_;
3410     $self->_remove_minus_elements ($element_state);
3411 wakaba 1.1
3412 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3413 wakaba 1.1 },
3414     };
3415    
3416     $Element->{$HTML_NS}->{abbr} = {
3417 wakaba 1.40 %HTMLPhrasingContentChecker,
3418 wakaba 1.187 status => FEATURE_HTML5_REC,
3419 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3420     %HTMLAttrStatus,
3421 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3422     full => FEATURE_XHTML2_ED,
3423 wakaba 1.187 lang => FEATURE_HTML5_REC,
3424 wakaba 1.49 }),
3425 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3426     ## number (plural vs singular) must match the grammatical number of the
3427     ## contents of the element." Though this can be checked by machine,
3428     ## it requires language-specific knowledge and dictionary, such that
3429     ## we don't support the check of the requirement.
3430     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3431 wakaba 1.49 };
3432    
3433     $Element->{$HTML_NS}->{acronym} = {
3434     %HTMLPhrasingContentChecker,
3435     status => FEATURE_M12N10_REC,
3436     check_attrs => $GetHTMLAttrsChecker->({}, {
3437     %HTMLAttrStatus,
3438     %HTMLM12NCommonAttrStatus,
3439 wakaba 1.187 lang => FEATURE_HTML5_REC,
3440 wakaba 1.49 }),
3441 wakaba 1.1 };
3442    
3443     $Element->{$HTML_NS}->{time} = {
3444 wakaba 1.187 status => FEATURE_HTML5_WD,
3445 wakaba 1.40 %HTMLPhrasingContentChecker,
3446     check_attrs => $GetHTMLAttrsChecker->({
3447 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3448 wakaba 1.49 }, {
3449     %HTMLAttrStatus,
3450     %HTMLM12NCommonAttrStatus,
3451 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3452 wakaba 1.1 }),
3453 wakaba 1.168 ## TODO: Update definition
3454 wakaba 1.1 ## TODO: Write tests
3455 wakaba 1.40 check_end => sub {
3456     my ($self, $item, $element_state) = @_;
3457 wakaba 1.1
3458 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3459 wakaba 1.1 my $input;
3460     my $reg_sp;
3461     my $input_node;
3462     if ($attr) {
3463     $input = $attr->value;
3464 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3465 wakaba 1.1 $input_node = $attr;
3466     } else {
3467 wakaba 1.40 $input = $item->{node}->text_content;
3468 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3469 wakaba 1.40 $input_node = $item->{node};
3470 wakaba 1.1
3471     ## ISSUE: What is the definition for "successfully extracts a date
3472     ## or time"? If the algorithm says the string is invalid but
3473     ## return some date or time, is it "successfully"?
3474     }
3475    
3476     my $hour;
3477     my $minute;
3478     my $second;
3479     if ($input =~ /
3480     \A
3481 wakaba 1.112 $reg_sp
3482 wakaba 1.1 ([0-9]+) # 1
3483     (?>
3484     -([0-9]+) # 2
3485 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3486     $reg_sp
3487 wakaba 1.1 (?>
3488     T
3489 wakaba 1.112 $reg_sp
3490 wakaba 1.1 )?
3491     ([0-9]+) # 4
3492     :([0-9]+) # 5
3493     (?>
3494     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3495     )?
3496 wakaba 1.112 $reg_sp
3497 wakaba 1.1 (?>
3498     Z
3499 wakaba 1.112 $reg_sp
3500 wakaba 1.1 |
3501     [+-]([0-9]+):([0-9]+) # 7, 8
3502 wakaba 1.112 $reg_sp
3503 wakaba 1.1 )?
3504     \z
3505     |
3506     :([0-9]+) # 9
3507     (?>
3508     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3509     )?
3510 wakaba 1.112 $reg_sp
3511     \z
3512 wakaba 1.1 )
3513     /x) {
3514     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3515     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3516     length $4 != 2 or length $5 != 2) {
3517     $self->{onerror}->(node => $input_node,
3518 wakaba 1.104 type => 'dateortime:syntax error',
3519     level => $self->{level}->{must});
3520 wakaba 1.1 }
3521    
3522     if (1 <= $2 and $2 <= 12) {
3523 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3524     level => $self->{level}->{must})
3525 wakaba 1.1 if $3 < 1 or
3526     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3527 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3528     level => $self->{level}->{must})
3529 wakaba 1.1 if $2 == 2 and $3 == 29 and
3530     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3531     } else {
3532     $self->{onerror}->(node => $input_node,
3533 wakaba 1.104 type => 'datetime:bad month',
3534     level => $self->{level}->{must});
3535 wakaba 1.1 }
3536    
3537     ($hour, $minute, $second) = ($4, $5, $6);
3538    
3539     if (defined $7) { ## [+-]hh:mm
3540     if (length $7 != 2 or length $8 != 2) {
3541     $self->{onerror}->(node => $input_node,
3542 wakaba 1.104 type => 'dateortime:syntax error',
3543     level => $self->{level}->{must});
3544 wakaba 1.1 }
3545    
3546     $self->{onerror}->(node => $input_node,
3547 wakaba 1.104 type => 'datetime:bad timezone hour',
3548     level => $self->{level}->{must})
3549 wakaba 1.1 if $7 > 23;
3550     $self->{onerror}->(node => $input_node,
3551 wakaba 1.104 type => 'datetime:bad timezone minute',
3552     level => $self->{level}->{must})
3553 wakaba 1.1 if $8 > 59;
3554     }
3555     } else { ## hh:mm
3556     if (length $1 != 2 or length $9 != 2) {
3557     $self->{onerror}->(node => $input_node,
3558 wakaba 1.104 type => qq'dateortime:syntax error',
3559     level => $self->{level}->{must});
3560 wakaba 1.1 }
3561    
3562     ($hour, $minute, $second) = ($1, $9, $10);
3563     }
3564    
3565 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3566     level => $self->{level}->{must}) if $hour > 23;
3567     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3568     level => $self->{level}->{must}) if $minute > 59;
3569 wakaba 1.1
3570     if (defined $second) { ## s
3571     ## NOTE: Integer part of second don't have to have length of two.
3572    
3573     if (substr ($second, 0, 1) eq '.') {
3574     $self->{onerror}->(node => $input_node,
3575 wakaba 1.104 type => 'dateortime:syntax error',
3576     level => $self->{level}->{must});
3577 wakaba 1.1 }
3578    
3579 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3580     level => $self->{level}->{must}) if $second >= 60;
3581 wakaba 1.1 }
3582     } else {
3583     $self->{onerror}->(node => $input_node,
3584 wakaba 1.104 type => 'dateortime:syntax error',
3585     level => $self->{level}->{must});
3586 wakaba 1.1 }
3587    
3588 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3589 wakaba 1.1 },
3590     };
3591    
3592     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3593 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3594 wakaba 1.113 ## TODO: content checking
3595     ## TODO: content or value must contain number (rev 2053)
3596 wakaba 1.40 %HTMLPhrasingContentChecker,
3597 wakaba 1.187 status => FEATURE_HTML5_WD,
3598 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3599 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3600     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3601     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3602     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3603     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3604     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3605 wakaba 1.50 }, {
3606     %HTMLAttrStatus,
3607     high => FEATURE_HTML5_DEFAULT,
3608     low => FEATURE_HTML5_DEFAULT,
3609     max => FEATURE_HTML5_DEFAULT,
3610     min => FEATURE_HTML5_DEFAULT,
3611     optimum => FEATURE_HTML5_DEFAULT,
3612     value => FEATURE_HTML5_DEFAULT,
3613 wakaba 1.1 }),
3614     };
3615    
3616     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3617 wakaba 1.40 %HTMLPhrasingContentChecker,
3618 wakaba 1.187 status => FEATURE_HTML5_WD,
3619 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3620 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3621     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3622 wakaba 1.50 }, {
3623     %HTMLAttrStatus,
3624     max => FEATURE_HTML5_DEFAULT,
3625     value => FEATURE_HTML5_DEFAULT,
3626 wakaba 1.1 }),
3627     };
3628    
3629     $Element->{$HTML_NS}->{code} = {
3630 wakaba 1.40 %HTMLPhrasingContentChecker,
3631 wakaba 1.187 status => FEATURE_HTML5_REC,
3632 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3633     %HTMLAttrStatus,
3634 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3635 wakaba 1.187 lang => FEATURE_HTML5_REC,
3636 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3637 wakaba 1.49 }),
3638 wakaba 1.1 };
3639    
3640     $Element->{$HTML_NS}->{var} = {
3641 wakaba 1.40 %HTMLPhrasingContentChecker,
3642 wakaba 1.187 status => FEATURE_HTML5_REC,
3643 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3644     %HTMLAttrStatus,
3645 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3646 wakaba 1.187 lang => FEATURE_HTML5_REC,
3647 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3648 wakaba 1.49 }),
3649 wakaba 1.1 };
3650    
3651     $Element->{$HTML_NS}->{samp} = {
3652 wakaba 1.40 %HTMLPhrasingContentChecker,
3653 wakaba 1.187 status => FEATURE_HTML5_REC,
3654 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3655     %HTMLAttrStatus,
3656 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3657 wakaba 1.187 lang => FEATURE_HTML5_REC,
3658 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3659 wakaba 1.49 }),
3660 wakaba 1.1 };
3661    
3662     $Element->{$HTML_NS}->{kbd} = {
3663 wakaba 1.40 %HTMLPhrasingContentChecker,
3664 wakaba 1.187 status => FEATURE_HTML5_REC,
3665 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3666     %HTMLAttrStatus,
3667 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3668 wakaba 1.187 lang => FEATURE_HTML5_REC,
3669 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3670 wakaba 1.49 }),
3671 wakaba 1.1 };
3672    
3673     $Element->{$HTML_NS}->{sub} = {
3674 wakaba 1.40 %HTMLPhrasingContentChecker,
3675 wakaba 1.187 status => FEATURE_HTML5_REC,
3676 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3677     %HTMLAttrStatus,
3678 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3679 wakaba 1.187 lang => FEATURE_HTML5_REC,
3680 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3681 wakaba 1.49 }),
3682 wakaba 1.1 };
3683    
3684 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3685 wakaba 1.1
3686     $Element->{$HTML_NS}->{span} = {
3687 wakaba 1.40 %HTMLPhrasingContentChecker,
3688 wakaba 1.187 status => FEATURE_HTML5_REC,
3689 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3690     %HTMLAttrStatus,
3691 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3692 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3693     dataformatas => FEATURE_HTML4_REC_RESERVED,
3694     datasrc => FEATURE_HTML4_REC_RESERVED,
3695 wakaba 1.187 lang => FEATURE_HTML5_REC,
3696 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3697 wakaba 1.49 }),
3698 wakaba 1.1 };
3699    
3700     $Element->{$HTML_NS}->{i} = {
3701 wakaba 1.40 %HTMLPhrasingContentChecker,
3702 wakaba 1.187 status => FEATURE_HTML5_REC,
3703 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3704     %HTMLAttrStatus,
3705     %HTMLM12NCommonAttrStatus,
3706 wakaba 1.187 lang => FEATURE_HTML5_REC,
3707 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3708 wakaba 1.49 }),
3709 wakaba 1.1 };
3710    
3711 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3712    
3713 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3714     %HTMLPhrasingContentChecker,
3715     status => FEATURE_M12N10_REC,
3716     check_attrs => $GetHTMLAttrsChecker->({}, {
3717     %HTMLAttrStatus,
3718     %HTMLM12NCommonAttrStatus,
3719 wakaba 1.187 lang => FEATURE_HTML5_REC,
3720 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3721     }),
3722     };
3723 wakaba 1.51
3724     $Element->{$HTML_NS}->{s} = {
3725 wakaba 1.40 %HTMLPhrasingContentChecker,
3726 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3727 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3728     %HTMLAttrStatus,
3729     %HTMLM12NCommonAttrStatus,
3730 wakaba 1.187 lang => FEATURE_HTML5_REC,
3731 wakaba 1.49 }),
3732 wakaba 1.1 };
3733    
3734 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3735    
3736     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3737    
3738 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3739 wakaba 1.40 %HTMLPhrasingContentChecker,
3740 wakaba 1.187 status => FEATURE_HTML5_REC,
3741 wakaba 1.40 check_attrs => sub {
3742     my ($self, $item, $element_state) = @_;
3743 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3744     %HTMLAttrStatus,
3745 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3746     dir => FEATURE_HTML5_REC,
3747     id => FEATURE_HTML5_REC,
3748     style => FEATURE_HTML5_REC,
3749     title => FEATURE_HTML5_REC,
3750     lang => FEATURE_HTML5_REC,
3751 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3752     sdasuff => FEATURE_HTML2X_RFC,
3753 wakaba 1.49 })->($self, $item, $element_state);
3754 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3755     $self->{onerror}->(node => $item->{node},
3756 wakaba 1.104 type => 'attribute missing',
3757     text => 'dir',
3758     level => $self->{level}->{must});
3759 wakaba 1.1 }
3760     },
3761     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3762     };
3763    
3764 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3765     %HTMLPhrasingContentChecker,
3766 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
3767 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
3768     %HTMLAttrStatus,
3769     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3770 wakaba 1.187 lang => FEATURE_HTML5_REC,
3771 wakaba 1.99 }),
3772     check_start => sub {
3773     my ($self, $item, $element_state) = @_;
3774    
3775     $element_state->{phase} = 'before-rb';
3776     #$element_state->{has_sig}
3777 wakaba 1.100
3778     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3779     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3780 wakaba 1.99 },
3781     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3782     check_child_element => sub {
3783     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3784     $child_is_transparent, $element_state) = @_;
3785 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3786     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3787 wakaba 1.99 $self->{onerror}->(node => $child_el,
3788     type => 'element not allowed:minus',
3789 wakaba 1.104 level => $self->{level}->{must});
3790 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3791     #
3792     } elsif ($element_state->{phase} eq 'before-rb') {
3793     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3794     $element_state->{phase} = 'in-rb';
3795     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3796     $self->{onerror}->(node => $child_el,
3797 wakaba 1.104 level => $self->{level}->{should},
3798     type => 'no significant content before');
3799 wakaba 1.99 $element_state->{phase} = 'after-rt';
3800     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3801     $self->{onerror}->(node => $child_el,
3802 wakaba 1.104 level => $self->{level}->{should},
3803     type => 'no significant content before');
3804 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3805     } else {
3806     $self->{onerror}->(node => $child_el,
3807 wakaba 1.104 type => 'element not allowed:ruby base',
3808     level => $self->{level}->{must});
3809 wakaba 1.99 $element_state->{phase} = 'in-rb';
3810     }
3811     } elsif ($element_state->{phase} eq 'in-rb') {
3812     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3813     #$element_state->{phase} = 'in-rb';
3814     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3815     unless ($element_state->{has_significant}) {
3816     $self->{onerror}->(node => $child_el,
3817 wakaba 1.104 level => $self->{level}->{should},
3818     type => 'no significant content before');
3819 wakaba 1.99 }
3820     $element_state->{phase} = 'after-rt';
3821     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3822     unless ($element_state->{has_significant}) {
3823     $self->{onerror}->(node => $child_el,
3824 wakaba 1.104 level => $self->{level}->{should},
3825     type => 'no significant content before');
3826 wakaba 1.99 }
3827     $element_state->{phase} = 'after-rp1';
3828     } else {
3829     $self->{onerror}->(node => $child_el,
3830 wakaba 1.104 type => 'element not allowed:ruby base',
3831     level => $self->{level}->{must});
3832 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3833     }
3834     } elsif ($element_state->{phase} eq 'after-rt') {
3835     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3836     if ($element_state->{has_significant}) {
3837     $element_state->{has_sig} = 1;
3838     delete $element_state->{has_significant};
3839     }
3840     $element_state->{phase} = 'in-rb';
3841     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3842     $self->{onerror}->(node => $child_el,
3843 wakaba 1.104 level => $self->{level}->{should},
3844     type => 'no significant content before');
3845 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3846     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3847     $self->{onerror}->(node => $child_el,
3848 wakaba 1.104 level => $self->{level}->{should},
3849     type => 'no significant content before');
3850 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3851     } else {
3852     $self->{onerror}->(node => $child_el,
3853 wakaba 1.104 type => 'element not allowed:ruby base',
3854     level => $self->{level}->{must});
3855 wakaba 1.99 if ($element_state->{has_significant}) {
3856     $element_state->{has_sig} = 1;
3857     delete $element_state->{has_significant};
3858     }
3859     $element_state->{phase} = 'in-rb';
3860     }
3861     } elsif ($element_state->{phase} eq 'after-rp1') {
3862     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3863     $element_state->{phase} = 'after-rp-rt';
3864     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3865     $self->{onerror}->(node => $child_el,
3866 wakaba 1.104 type => 'ps element missing',
3867     text => 'rt',
3868     level => $self->{level}->{must});
3869 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3870     } else {
3871     $self->{onerror}->(node => $child_el,
3872 wakaba 1.104 type => 'ps element missing',
3873     text => 'rt',
3874     level => $self->{level}->{must});
3875 wakaba 1.99 $self->{onerror}->(node => $child_el,
3876 wakaba 1.104 type => 'ps element missing',
3877     text => 'rp',
3878     level => $self->{level}->{must});
3879 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3880     $self->{onerror}->(node => $child_el,
3881 wakaba 1.104 type => 'element not allowed:ruby base',
3882     level => $self->{level}->{must});
3883 wakaba 1.99 }
3884     if ($element_state->{has_significant}) {
3885     $element_state->{has_sig} = 1;
3886     delete $element_state->{has_significant};
3887     }
3888     $element_state->{phase} = 'in-rb';
3889     }
3890     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3891     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3892     $element_state->{phase} = 'after-rp2';
3893     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3894     $self->{onerror}->(node => $child_el,
3895 wakaba 1.104 type => 'ps element missing',
3896     text => 'rp',
3897     level => $self->{level}->{must});
3898 wakaba 1.99 $self->{onerror}->(node => $child_el,
3899 wakaba 1.104 level => $self->{level}->{should},
3900     type => 'no significant content before');
3901 wakaba 1.99 $element_state->{phase} = 'after-rt';
3902     } else {
3903     $self->{onerror}->(node => $child_el,
3904 wakaba 1.104 type => 'ps element missing',
3905     text => 'rp',
3906     level => $self->{level}->{must});
3907 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3908     $self->{onerror}->(node => $child_el,
3909 wakaba 1.104 type => 'element not allowed:ruby base',
3910     level => $self->{level}->{must});
3911 wakaba 1.99 }
3912     if ($element_state->{has_significant}) {
3913     $element_state->{has_sig} = 1;
3914     delete $element_state->{has_significant};
3915     }
3916     $element_state->{phase} = 'in-rb';
3917     }
3918     } elsif ($element_state->{phase} eq 'after-rp2') {
3919     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3920     if ($element_state->{has_significant}) {
3921     $element_state->{has_sig} = 1;
3922     delete $element_state->{has_significant};
3923     }
3924     $element_state->{phase} = 'in-rb';
3925     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3926     $self->{onerror}->(node => $child_el,
3927 wakaba 1.104 level => $self->{level}->{should},
3928     type => 'no significant content before');
3929 wakaba 1.99 $element_state->{phase} = 'after-rt';
3930     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3931     $self->{onerror}->(node => $child_el,
3932 wakaba 1.104 level => $self->{level}->{should},
3933     type => 'no significant content before');
3934 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3935     } else {
3936     $self->{onerror}->(node => $child_el,
3937 wakaba 1.104 type => 'element not allowed:ruby base',
3938     level => $self->{level}->{must});
3939 wakaba 1.99 if ($element_state->{has_significant}) {
3940     $element_state->{has_sig} = 1;
3941     delete $element_state->{has_significant};
3942     }
3943     $element_state->{phase} = 'in-rb';
3944     }
3945     } else {
3946     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3947     }
3948     },
3949     check_child_text => sub {
3950     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3951     if ($has_significant) {
3952     if ($element_state->{phase} eq 'before-rb') {
3953     $element_state->{phase} = 'in-rb';
3954     } elsif ($element_state->{phase} eq 'in-rb') {
3955     #
3956     } elsif ($element_state->{phase} eq 'after-rt' or
3957     $element_state->{phase} eq 'after-rp2') {
3958     $element_state->{phase} = 'in-rb';
3959     } elsif ($element_state->{phase} eq 'after-rp1') {
3960     $self->{onerror}->(node => $child_node,
3961 wakaba 1.104 type => 'ps element missing',
3962     text => 'rt',
3963     level => $self->{level}->{must});
3964 wakaba 1.99 $self->{onerror}->(node => $child_node,
3965 wakaba 1.104 type => 'ps element missing',
3966     text => 'rp',
3967     level => $self->{level}->{must});
3968 wakaba 1.99 $element_state->{phase} = 'in-rb';
3969     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3970     $self->{onerror}->(node => $child_node,
3971 wakaba 1.104 type => 'ps element missing',
3972     text => 'rp',
3973     level => $self->{level}->{must});
3974 wakaba 1.99 $element_state->{phase} = 'in-rb';
3975     } else {
3976     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3977     }
3978     }
3979     },
3980     check_end => sub {
3981     my ($self, $item, $element_state) = @_;
3982     $self->_remove_minus_elements ($element_state);
3983    
3984     if ($element_state->{phase} eq 'before-rb') {
3985     $self->{onerror}->(node => $item->{node},
3986 wakaba 1.104 level => $self->{level}->{should},
3987 wakaba 1.99 type => 'no significant content');
3988     $self->{onerror}->(node => $item->{node},
3989 wakaba 1.104 type => 'element missing',
3990     text => 'rt',
3991     level => $self->{level}->{must});
3992 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3993     unless ($element_state->{has_significant}) {
3994     $self->{onerror}->(node => $item->{node},
3995 wakaba 1.104 level => $self->{level}->{should},
3996     type => 'no significant content at the end');
3997 wakaba 1.99 }
3998     $self->{onerror}->(node => $item->{node},
3999 wakaba 1.104 type => 'element missing',
4000     text => 'rt',
4001     level => $self->{level}->{must});
4002 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
4003     $element_state->{phase} eq 'after-rp2') {
4004     #
4005     } elsif ($element_state->{phase} eq 'after-rp1') {
4006     $self->{onerror}->(node => $item->{node},
4007 wakaba 1.104 type => 'element missing',
4008     text => 'rt',
4009     level => $self->{level}->{must});
4010 wakaba 1.99 $self->{onerror}->(node => $item->{node},
4011 wakaba 1.104 type => 'element missing',
4012     text => 'rp',
4013     level => $self->{level}->{must});
4014 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
4015     $self->{onerror}->(node => $item->{node},
4016 wakaba 1.104 type => 'element missing',
4017     text => 'rp',
4018     level => $self->{level}->{must});
4019 wakaba 1.99 } else {
4020     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
4021     }
4022    
4023     ## NOTE: A modified version of |check_end| of %AnyChecker.
4024     if ($element_state->{has_significant} or $element_state->{has_sig}) {
4025     $item->{real_parent_state}->{has_significant} = 1;
4026     }
4027     },
4028     };
4029    
4030     $Element->{$HTML_NS}->{rt} = {
4031     %HTMLPhrasingContentChecker,
4032 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4033 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
4034     %HTMLAttrStatus,
4035     %HTMLM12NXHTML2CommonAttrStatus,
4036 wakaba 1.187 lang => FEATURE_HTML5_REC,
4037 wakaba 1.99 }),
4038     };
4039    
4040     $Element->{$HTML_NS}->{rp} = {
4041 wakaba 1.171 %HTMLPhrasingContentChecker,
4042 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4043 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
4044     %HTMLAttrStatus,
4045     %HTMLM12NXHTML2CommonAttrStatus,
4046 wakaba 1.187 lang => FEATURE_HTML5_REC,
4047 wakaba 1.99 }),
4048 wakaba 1.171 }; # rp
4049 wakaba 1.99
4050 wakaba 1.29 =pod
4051    
4052     ## TODO:
4053    
4054     +
4055     + <p>Partly because of the confusion described above, authors are
4056     + strongly recommended to always mark up all paragraphs with the
4057     + <code>p</code> element, and to not have any <code>ins</code> or
4058     + <code>del</code> elements that cross across any <span
4059     + title="paragraph">implied paragraphs</span>.</p>
4060     +
4061     (An informative note)
4062    
4063     <p><code>ins</code> elements should not cross <span
4064     + title="paragraph">implied paragraph</span> boundaries.</p>
4065     (normative)
4066    
4067     + <p><code>del</code> elements should not cross <span
4068     + title="paragraph">implied paragraph</span> boundaries.</p>
4069     (normative)
4070    
4071     =cut
4072    
4073 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4074 wakaba 1.40 %HTMLTransparentChecker,
4075 wakaba 1.187 status => FEATURE_HTML5_REC,
4076 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4077 wakaba 1.1 cite => $HTMLURIAttrChecker,
4078 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4079 wakaba 1.49 }, {
4080     %HTMLAttrStatus,
4081     %HTMLM12NCommonAttrStatus,
4082 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4083 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4084 wakaba 1.187 lang => FEATURE_HTML5_REC,
4085 wakaba 1.1 }),
4086 wakaba 1.66 check_start => sub {
4087     my ($self, $item, $element_state) = @_;
4088    
4089     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4090 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4091     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4092 wakaba 1.66 },
4093 wakaba 1.1 };
4094    
4095     $Element->{$HTML_NS}->{del} = {
4096 wakaba 1.40 %HTMLTransparentChecker,
4097 wakaba 1.187 status => FEATURE_HTML5_REC,
4098 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4099 wakaba 1.1 cite => $HTMLURIAttrChecker,
4100 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4101 wakaba 1.49 }, {
4102     %HTMLAttrStatus,
4103     %HTMLM12NCommonAttrStatus,
4104 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4105 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4106 wakaba 1.187 lang => FEATURE_HTML5_REC,
4107 wakaba 1.1 }),
4108 wakaba 1.40 check_end => sub {
4109     my ($self, $item, $element_state) = @_;
4110     if ($element_state->{has_significant}) {
4111     ## NOTE: Significantness flag does not propagate.
4112     } elsif ($item->{transparent}) {
4113     #
4114     } else {
4115     $self->{onerror}->(node => $item->{node},
4116 wakaba 1.104 level => $self->{level}->{should},
4117 wakaba 1.40 type => 'no significant content');
4118     }
4119 wakaba 1.1 },
4120 wakaba 1.66 check_start => sub {
4121     my ($self, $item, $element_state) = @_;
4122    
4123     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4124 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4125     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4126 wakaba 1.66 },
4127 wakaba 1.1 };
4128    
4129 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4130 wakaba 1.72 %HTMLFlowContentChecker,
4131 wakaba 1.153 status => FEATURE_HTML5_WD,
4132 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4133 wakaba 1.41 check_child_element => sub {
4134     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4135     $child_is_transparent, $element_state) = @_;
4136 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4137     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4138 wakaba 1.41 $self->{onerror}->(node => $child_el,
4139     type => 'element not allowed:minus',
4140 wakaba 1.104 level => $self->{level}->{must});
4141 wakaba 1.41 $element_state->{has_non_legend} = 1;
4142 wakaba 1.181 $element_state->{has_non_table} = 1;
4143 wakaba 1.41 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4144 wakaba 1.181 $element_state->{has_non_table} = 1;
4145 wakaba 1.41 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4146     if ($element_state->{has_legend_at_first}) {
4147     $self->{onerror}->(node => $child_el,
4148     type => 'element not allowed:figure legend',
4149 wakaba 1.104 level => $self->{level}->{must});
4150 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4151     $self->{onerror}->(node => $element_state->{has_legend},
4152     type => 'element not allowed:figure legend',
4153 wakaba 1.104 level => $self->{level}->{must});
4154 wakaba 1.41 $element_state->{has_legend} = $child_el;
4155     } elsif ($element_state->{has_non_legend}) {
4156     $element_state->{has_legend} = $child_el;
4157     } else {
4158     $element_state->{has_legend_at_first} = 1;
4159 wakaba 1.35 }
4160 wakaba 1.41 delete $element_state->{has_non_legend};
4161     } else {
4162 wakaba 1.181 if ($child_nsuri eq $HTML_NS and $child_ln eq 'table') {
4163     $element_state->{has_table}++;
4164     } else {
4165     $element_state->{has_non_table}++;
4166     }
4167 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4168 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4169 wakaba 1.41 }
4170     },
4171     check_child_text => sub {
4172     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4173     if ($has_significant) {
4174     $element_state->{has_non_legend} = 1;
4175 wakaba 1.181 $element_state->{has_non_table}++;
4176 wakaba 1.35 }
4177 wakaba 1.170
4178     $element_state->{in_figure} = 1;
4179 wakaba 1.41 },
4180     check_end => sub {
4181     my ($self, $item, $element_state) = @_;
4182 wakaba 1.35
4183 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4184     #
4185     } elsif ($element_state->{has_legend}) {
4186     if ($element_state->{has_non_legend}) {
4187     $self->{onerror}->(node => $element_state->{has_legend},
4188 wakaba 1.35 type => 'element not allowed:figure legend',
4189 wakaba 1.104 level => $self->{level}->{must});
4190 wakaba 1.35 }
4191     }
4192 wakaba 1.41
4193 wakaba 1.181 if (($element_state->{has_table} || 0) == 1 and
4194     not $element_state->{has_non_table} and
4195     $element_state->{table_caption_element}) {
4196     $self->{onerror}->(node => $element_state->{table_caption_element},
4197     type => 'element not allowed',
4198     level => $self->{level}->{should});
4199     }
4200    
4201 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4202 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4203 wakaba 1.35 },
4204     };
4205 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4206 wakaba 1.1
4207 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4208     my ($self, $attr) = @_;
4209 wakaba 1.104 $self->{onerror}->(node => $attr,
4210     type => 'unknown attribute',
4211     level => $self->{level}->{uncertain});
4212 wakaba 1.92 };
4213    
4214 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4215 wakaba 1.40 %HTMLEmptyChecker,
4216 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4217 wakaba 1.40 check_attrs => sub {
4218     my ($self, $item, $element_state) = @_;
4219 wakaba 1.1 $GetHTMLAttrsChecker->({
4220 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4221     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4222     }),
4223 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4224 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4225 wakaba 1.1 src => $HTMLURIAttrChecker,
4226     usemap => $HTMLUsemapAttrChecker,
4227 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4228 wakaba 1.1 ismap => sub {
4229 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4230     if (not $self->{flag}->{in_a_href}) {
4231 wakaba 1.15 $self->{onerror}->(node => $attr,
4232 wakaba 1.59 type => 'attribute not allowed:ismap',
4233 wakaba 1.104 level => $self->{level}->{must});
4234 wakaba 1.1 }
4235 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4236 wakaba 1.1 },
4237 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4238     ## TODO: HTML4 |name|
4239 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4240 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4241 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4242 wakaba 1.49 }, {
4243     %HTMLAttrStatus,
4244 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4245 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4246 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4247 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4248 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4249 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4250 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4251 wakaba 1.187 lang => FEATURE_HTML5_REC,
4252 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4253     name => FEATURE_M12N10_REC_DEPRECATED,
4254 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4255 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4256     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4257 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4258 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4259 wakaba 1.66 })->($self, $item, $element_state);
4260 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4261     $self->{onerror}->(node => $item->{node},
4262 wakaba 1.104 type => 'attribute missing',
4263     text => 'alt',
4264     level => $self->{level}->{should});
4265 wakaba 1.114 ## TODO: ...
4266 wakaba 1.1 }
4267 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4268     $self->{onerror}->(node => $item->{node},
4269 wakaba 1.104 type => 'attribute missing',
4270     text => 'src',
4271     level => $self->{level}->{must});
4272 wakaba 1.1 }
4273 wakaba 1.66
4274 wakaba 1.114 ## TODO: external resource check
4275    
4276 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4277     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4278     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4279     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4280 wakaba 1.1 },
4281     };
4282    
4283     $Element->{$HTML_NS}->{iframe} = {
4284 wakaba 1.40 %HTMLTextChecker,
4285 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4286 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4287 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4288 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4289 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4290 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4291     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4292     }),
4293     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4294 wakaba 1.1 src => $HTMLURIAttrChecker,
4295 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4296 wakaba 1.49 }, {
4297     %HTMLAttrStatus,
4298     %HTMLM12NCommonAttrStatus,
4299     align => FEATURE_XHTML10_REC,
4300 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4301 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4302 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4303 wakaba 1.187 id => FEATURE_HTML5_REC,
4304 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4305     marginheight => FEATURE_M12N10_REC,
4306     marginwidth => FEATURE_M12N10_REC,
4307 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4308     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4309     sandbox => FEATURE_HTML5_WD,
4310 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4311 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4312     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4313 wakaba 1.187 title => FEATURE_HTML5_REC,
4314 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4315 wakaba 1.1 }),
4316 wakaba 1.66 check_start => sub {
4317     my ($self, $item, $element_state) = @_;
4318    
4319     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4320 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4321     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4322 wakaba 1.66 },
4323 wakaba 1.40 };
4324    
4325 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4326 wakaba 1.40 %HTMLEmptyChecker,
4327 wakaba 1.98 status => FEATURE_HTML5_WD,
4328 wakaba 1.40 check_attrs => sub {
4329     my ($self, $item, $element_state) = @_;
4330 wakaba 1.1 my $has_src;
4331 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4332 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4333     $attr_ns = '' unless defined $attr_ns;
4334     my $attr_ln = $attr->manakai_local_name;
4335     my $checker;
4336 wakaba 1.73
4337     my $status = {
4338     %HTMLAttrStatus,
4339 wakaba 1.153 height => FEATURE_HTML5_LC,
4340 wakaba 1.98 src => FEATURE_HTML5_WD,
4341     type => FEATURE_HTML5_WD,
4342 wakaba 1.153 width => FEATURE_HTML5_LC,
4343 wakaba 1.73 }->{$attr_ln};
4344    
4345 wakaba 1.1 if ($attr_ns eq '') {
4346     if ($attr_ln eq 'src') {
4347     $checker = $HTMLURIAttrChecker;
4348     $has_src = 1;
4349     } elsif ($attr_ln eq 'type') {
4350     $checker = $HTMLIMTAttrChecker;
4351 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4352 wakaba 1.178 $checker = $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 });
4353 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4354     $attr_ln !~ /[A-Z]/) {
4355 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4356     $status = $HTMLDatasetAttrStatus;
4357 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4358 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4359 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4360 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4361     || sub { }; ## NOTE: Any local attribute is ok.
4362 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4363 wakaba 1.117 } else {
4364     $checker = $HTMLAttrChecker->{$attr_ln};
4365 wakaba 1.1 }
4366     }
4367     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4368 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4369     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4370     || $AttrStatus->{$attr_ns}->{''};
4371     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4372 wakaba 1.62
4373 wakaba 1.1 if ($checker) {
4374 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4375 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4376 wakaba 1.54 #
4377 wakaba 1.1 } else {
4378 wakaba 1.104 $self->{onerror}->(node => $attr,
4379     type => 'unknown attribute',
4380     level => $self->{level}->{uncertain});
4381 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4382     }
4383    
4384 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4385 wakaba 1.1 }
4386    
4387     unless ($has_src) {
4388 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4389 wakaba 1.104 type => 'attribute missing',
4390     text => 'src',
4391 wakaba 1.114 level => $self->{level}->{info});
4392     ## NOTE: <embed> without src="" is allowed since revision 1929.
4393     ## We issues an informational message since <embed> w/o src=""
4394     ## is likely an authoring error.
4395 wakaba 1.1 }
4396 wakaba 1.114
4397     ## TODO: external resource check
4398 wakaba 1.66
4399     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4400 wakaba 1.1 },
4401     };
4402    
4403 wakaba 1.49 ## TODO:
4404     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4405     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4406    
4407 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4408 wakaba 1.40 %HTMLTransparentChecker,
4409 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4410 wakaba 1.40 check_attrs => sub {
4411     my ($self, $item, $element_state) = @_;
4412 wakaba 1.1 $GetHTMLAttrsChecker->({
4413 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4414     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4415     }),
4416     archive => $HTMLSpaceURIsAttrChecker,
4417     ## TODO: Relative to @codebase
4418     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4419     classid => $HTMLURIAttrChecker,
4420     codebase => $HTMLURIAttrChecker,
4421     codetype => $HTMLIMTAttrChecker,
4422     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4423 wakaba 1.1 data => $HTMLURIAttrChecker,
4424 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4425     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4426     ## [HTML4] but we don't know how to test this.
4427 wakaba 1.167 form => $HTMLFormAttrChecker,
4428 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4429 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4430 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4431 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4432     ## the name of the browsing context created by the element,
4433     ## if any, but is also used as the form control name of the
4434     ## form control provided by the plugin, if any.
4435 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4436 wakaba 1.1 type => $HTMLIMTAttrChecker,
4437     usemap => $HTMLUsemapAttrChecker,
4438 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4439 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4440 wakaba 1.49 }, {
4441     %HTMLAttrStatus,
4442 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4443 wakaba 1.49 align => FEATURE_XHTML10_REC,
4444 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4445 wakaba 1.49 border => FEATURE_XHTML10_REC,
4446     classid => FEATURE_M12N10_REC,
4447     codebase => FEATURE_M12N10_REC,
4448     codetype => FEATURE_M12N10_REC,
4449 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4450 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4451 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4452     dataformatas => FEATURE_HTML4_REC_RESERVED,
4453     datasrc => FEATURE_HTML4_REC_RESERVED,
4454 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4455 wakaba 1.187 form => FEATURE_HTML5_LC,
4456 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4457 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4458 wakaba 1.187 lang => FEATURE_HTML5_REC,
4459 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4460 wakaba 1.49 standby => FEATURE_M12N10_REC,
4461 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4462 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4463     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4464 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4465 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4466 wakaba 1.66 })->($self, $item, $element_state);
4467 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4468     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4469     $self->{onerror}->(node => $item->{node},
4470 wakaba 1.104 type => 'attribute missing:data|type',
4471     level => $self->{level}->{must});
4472 wakaba 1.1 }
4473     }
4474 wakaba 1.66
4475     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4476     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4477     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4478     ## TODO: archive
4479     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4480 wakaba 1.1 },
4481 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4482 wakaba 1.41 check_child_element => sub {
4483     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4484     $child_is_transparent, $element_state) = @_;
4485 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4486     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4487 wakaba 1.41 $self->{onerror}->(node => $child_el,
4488     type => 'element not allowed:minus',
4489 wakaba 1.104 level => $self->{level}->{must});
4490 wakaba 1.41 $element_state->{has_non_legend} = 1;
4491     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4492     #
4493     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4494     if ($element_state->{has_non_param}) {
4495 wakaba 1.104 $self->{onerror}->(node => $child_el,
4496 wakaba 1.72 type => 'element not allowed:flow',
4497 wakaba 1.104 level => $self->{level}->{must});
4498 wakaba 1.39 }
4499 wakaba 1.41 } else {
4500 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4501 wakaba 1.41 $element_state->{has_non_param} = 1;
4502 wakaba 1.39 }
4503 wakaba 1.25 },
4504 wakaba 1.41 check_child_text => sub {
4505     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4506     if ($has_significant) {
4507     $element_state->{has_non_param} = 1;
4508     }
4509 wakaba 1.42 },
4510     check_end => sub {
4511     my ($self, $item, $element_state) = @_;
4512     if ($element_state->{has_significant}) {
4513 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4514 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4515     ## NOTE: Transparent.
4516     } else {
4517     $self->{onerror}->(node => $item->{node},
4518 wakaba 1.104 level => $self->{level}->{should},
4519 wakaba 1.42 type => 'no significant content');
4520     }
4521     },
4522 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4523 wakaba 1.1 };
4524 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4525     ## What about |<section><object data><style scoped></style>x</object></section>|?
4526     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4527 wakaba 1.1
4528     $Element->{$HTML_NS}->{param} = {
4529 wakaba 1.40 %HTMLEmptyChecker,
4530 wakaba 1.187 status => FEATURE_HTML5_REC,
4531 wakaba 1.40 check_attrs => sub {
4532     my ($self, $item, $element_state) = @_;
4533 wakaba 1.1 $GetHTMLAttrsChecker->({
4534     name => sub { },
4535 wakaba 1.70 type => $HTMLIMTAttrChecker,
4536 wakaba 1.1 value => sub { },
4537 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4538     data => 1, ref => 1, object => 1,
4539     }),
4540 wakaba 1.49 }, {
4541     %HTMLAttrStatus,
4542 wakaba 1.154 href => FEATURE_RDFA_REC,
4543 wakaba 1.187 id => FEATURE_HTML5_REC,
4544 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4545 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4546 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4547 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4548 wakaba 1.66 })->(@_);
4549 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4550     $self->{onerror}->(node => $item->{node},
4551 wakaba 1.104 type => 'attribute missing',
4552     text => 'name',
4553     level => $self->{level}->{must});
4554 wakaba 1.1 }
4555 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4556     $self->{onerror}->(node => $item->{node},
4557 wakaba 1.104 type => 'attribute missing',
4558     text => 'value',
4559     level => $self->{level}->{must});
4560 wakaba 1.1 }
4561     },
4562     };
4563    
4564     $Element->{$HTML_NS}->{video} = {
4565 wakaba 1.40 %HTMLTransparentChecker,
4566 wakaba 1.48 status => FEATURE_HTML5_LC,
4567 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4568 wakaba 1.1 src => $HTMLURIAttrChecker,
4569     ## TODO: start, loopstart, loopend, end
4570     ## ISSUE: they MUST be "value time offset"s. Value?
4571 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4572 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4573 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4574     controls => $GetHTMLBooleanAttrChecker->('controls'),
4575 wakaba 1.59 poster => $HTMLURIAttrChecker,
4576 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4577     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4578 wakaba 1.50 }, {
4579     %HTMLAttrStatus,
4580 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4581 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4582     controls => FEATURE_HTML5_LC,
4583 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4584 wakaba 1.50 height => FEATURE_HTML5_LC,
4585 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4586     loopstart => FEATURE_HTML5_AT_RISK,
4587     playcount => FEATURE_HTML5_AT_RISK,
4588 wakaba 1.50 poster => FEATURE_HTML5_LC,
4589     src => FEATURE_HTML5_LC,
4590 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4591 wakaba 1.50 width => FEATURE_HTML5_LC,
4592 wakaba 1.1 }),
4593 wakaba 1.42 check_start => sub {
4594     my ($self, $item, $element_state) = @_;
4595     $element_state->{allow_source}
4596     = not $item->{node}->has_attribute_ns (undef, 'src');
4597     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4598     ## NOTE: It might be set true by |check_element|.
4599 wakaba 1.66
4600     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4601     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4602 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4603     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4604 wakaba 1.42 },
4605     check_child_element => sub {
4606     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4607     $child_is_transparent, $element_state) = @_;
4608 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4609     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4610 wakaba 1.42 $self->{onerror}->(node => $child_el,
4611     type => 'element not allowed:minus',
4612 wakaba 1.104 level => $self->{level}->{must});
4613 wakaba 1.42 delete $element_state->{allow_source};
4614     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4615     #
4616     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4617 wakaba 1.45 unless ($element_state->{allow_source}) {
4618 wakaba 1.104 $self->{onerror}->(node => $child_el,
4619 wakaba 1.72 type => 'element not allowed:flow',
4620 wakaba 1.104 level => $self->{level}->{must});
4621 wakaba 1.42 }
4622 wakaba 1.45 $element_state->{has_source} = 1;
4623 wakaba 1.1 } else {
4624 wakaba 1.42 delete $element_state->{allow_source};
4625 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4626 wakaba 1.42 }
4627     },
4628     check_child_text => sub {
4629     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4630     if ($has_significant) {
4631     delete $element_state->{allow_source};
4632     }
4633 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4634 wakaba 1.42 },
4635     check_end => sub {
4636     my ($self, $item, $element_state) = @_;
4637     if ($element_state->{has_source} == -1) {
4638     $self->{onerror}->(node => $item->{node},
4639 wakaba 1.104 type => 'child element missing',
4640     text => 'source',
4641     level => $self->{level}->{must});
4642 wakaba 1.1 }
4643 wakaba 1.42
4644     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4645 wakaba 1.1 },
4646     };
4647    
4648     $Element->{$HTML_NS}->{audio} = {
4649 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4650 wakaba 1.48 status => FEATURE_HTML5_LC,
4651 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4652     src => $HTMLURIAttrChecker,
4653     ## TODO: start, loopstart, loopend, end
4654     ## ISSUE: they MUST be "value time offset"s. Value?
4655     ## ISSUE: playcount has no conformance creteria
4656 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4657 wakaba 1.42 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4658     controls => $GetHTMLBooleanAttrChecker->('controls'),
4659 wakaba 1.50 }, {
4660     %HTMLAttrStatus,
4661 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4662 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4663     controls => FEATURE_HTML5_LC,
4664 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4665     loopend => FEATURE_HTML5_AT_RISK,
4666     loopstart => FEATURE_HTML5_AT_RISK,
4667     playcount => FEATURE_HTML5_AT_RISK,
4668 wakaba 1.50 src => FEATURE_HTML5_LC,
4669 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4670 wakaba 1.42 }),
4671 wakaba 1.1 };
4672    
4673     $Element->{$HTML_NS}->{source} = {
4674 wakaba 1.40 %HTMLEmptyChecker,
4675 wakaba 1.153 status => FEATURE_HTML5_LC,
4676 wakaba 1.40 check_attrs => sub {
4677     my ($self, $item, $element_state) = @_;
4678 wakaba 1.1 $GetHTMLAttrsChecker->({
4679 wakaba 1.90 media => $HTMLMQAttrChecker,
4680     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4681     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4682 wakaba 1.1 type => $HTMLIMTAttrChecker,
4683 wakaba 1.50 }, {
4684     %HTMLAttrStatus,
4685 wakaba 1.153 media => FEATURE_HTML5_LC,
4686     pixelratio => FEATURE_HTML5_LC,
4687     src => FEATURE_HTML5_LC,
4688     type => FEATURE_HTML5_LC,
4689 wakaba 1.66 })->(@_);
4690 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4691     $self->{onerror}->(node => $item->{node},
4692 wakaba 1.104 type => 'attribute missing',
4693     text => 'src',
4694     level => $self->{level}->{must});
4695 wakaba 1.1 }
4696 wakaba 1.66
4697     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4698 wakaba 1.1 },
4699     };
4700    
4701     $Element->{$HTML_NS}->{canvas} = {
4702 wakaba 1.40 %HTMLTransparentChecker,
4703 wakaba 1.187 status => FEATURE_HTML5_REC,
4704 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4705 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4706     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4707 wakaba 1.50 }, {
4708     %HTMLAttrStatus,
4709 wakaba 1.187 height => FEATURE_HTML5_REC,
4710     width => FEATURE_HTML5_REC,
4711 wakaba 1.1 }),
4712 wakaba 1.178
4713     # Authors MUST provide alternative content (HTML5 revision 2868) -
4714     # This requirement cannot be checked, since the alternative content
4715     # might be placed outside of the element.
4716     }; # canvas
4717 wakaba 1.1
4718     $Element->{$HTML_NS}->{map} = {
4719 wakaba 1.72 %HTMLFlowContentChecker,
4720 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4721 wakaba 1.40 check_attrs => sub {
4722     my ($self, $item, $element_state) = @_;
4723 wakaba 1.100 my $has_name;
4724 wakaba 1.4 $GetHTMLAttrsChecker->({
4725 wakaba 1.100 name => sub {
4726     my ($self, $attr) = @_;
4727     my $value = $attr->value;
4728     if (length $value) {
4729     ## NOTE: Duplication is not non-conforming.
4730     ## NOTE: Space characters are not non-conforming.
4731     #
4732     } else {
4733     $self->{onerror}->(node => $attr,
4734     type => 'empty attribute value',
4735 wakaba 1.104 level => $self->{level}->{must});
4736 wakaba 1.100 }
4737 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4738 wakaba 1.100 $has_name = [$value, $attr];
4739 wakaba 1.4 },
4740 wakaba 1.49 }, {
4741     %HTMLAttrStatus,
4742 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4743     dir => FEATURE_HTML5_REC,
4744     id => FEATURE_HTML5_REC,
4745     lang => FEATURE_HTML5_REC,
4746 wakaba 1.153 #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4747     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4748 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4749     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4750     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4751     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4752     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4753     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4754     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4755     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4756     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4757     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4758 wakaba 1.187 title => FEATURE_HTML5_REC,
4759 wakaba 1.66 })->(@_);
4760 wakaba 1.100
4761 wakaba 1.135 if ($has_name) {
4762 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4763 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4764 wakaba 1.155 $self->{onerror}
4765     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4766     type => 'id ne name',
4767     level => $self->{level}->{must});
4768 wakaba 1.100 }
4769 wakaba 1.135 } else {
4770 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4771 wakaba 1.104 type => 'attribute missing',
4772     text => 'name',
4773     level => $self->{level}->{must});
4774 wakaba 1.100 }
4775 wakaba 1.4 },
4776 wakaba 1.59 check_start => sub {
4777     my ($self, $item, $element_state) = @_;
4778     $element_state->{in_map_original} = $self->{flag}->{in_map};
4779 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4780     ## NOTE: |{in_map}| is a reference to the array which contains
4781     ## hash references. Hashes are corresponding to the opening
4782     ## |map| elements and each of them contains the key-value
4783     ## pairs corresponding to the absolute URLs for the processed
4784     ## |area| elements in the |map| element corresponding to the
4785     ## hash. The key represents the resource (## TODO: use
4786     ## absolute URL), while the value represents whether there is
4787     ## an |area| element whose |alt| attribute is specified to a
4788     ## non-empty value. If there IS such an |area| element for
4789     ## the resource specified by the key, then the value is set to
4790     ## zero (|0|). Otherwise, if there is no such an |area|
4791     ## element but there is any |area| element with the empty
4792     ## |alt=""| attribute, then the value contains an array
4793     ## reference that contains all of such |area| elements.
4794 wakaba 1.79
4795     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4796     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4797 wakaba 1.59 },
4798     check_end => sub {
4799     my ($self, $item, $element_state) = @_;
4800 wakaba 1.137
4801     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4802     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4803     next unless $nodes;
4804     for (@$nodes) {
4805     $self->{onerror}->(type => 'empty area alt',
4806     node => $_,
4807     level => $self->{level}->{html5_no_may});
4808     }
4809     }
4810    
4811     $self->{flag}->{in_map} = $element_state->{in_map_original};
4812    
4813 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4814 wakaba 1.59 },
4815 wakaba 1.1 };
4816    
4817     $Element->{$HTML_NS}->{area} = {
4818 wakaba 1.40 %HTMLEmptyChecker,
4819 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4820 wakaba 1.40 check_attrs => sub {
4821     my ($self, $item, $element_state) = @_;
4822 wakaba 1.1 my %attr;
4823     my $coords;
4824 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4825 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4826     $attr_ns = '' unless defined $attr_ns;
4827     my $attr_ln = $attr->manakai_local_name;
4828     my $checker;
4829 wakaba 1.73 my $status;
4830 wakaba 1.1 if ($attr_ns eq '') {
4831 wakaba 1.73 $status = {
4832     %HTMLAttrStatus,
4833     %HTMLM12NCommonAttrStatus,
4834 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
4835 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4836     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4837 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4838 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4839 wakaba 1.187 lang => FEATURE_HTML5_REC,
4840 wakaba 1.154 media => FEATURE_HTML5_WD,
4841 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4842     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4843     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4844 wakaba 1.153 ping => FEATURE_HTML5_WD,
4845 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4846 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4847 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4848 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4849     type => FEATURE_HTML5_WD,
4850 wakaba 1.73 }->{$attr_ln};
4851    
4852 wakaba 1.1 $checker = {
4853 wakaba 1.153 alt => sub {
4854     ## NOTE: Checked later.
4855     },
4856 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4857     circ => -1, circle => 1,
4858     default => 1,
4859     poly => 1, polygon => -1,
4860     rect => 1, rectangle => -1,
4861     }),
4862     coords => sub {
4863     my ($self, $attr) = @_;
4864     my $value = $attr->value;
4865     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4866     $coords = [split /,/, $value];
4867     } else {
4868     $self->{onerror}->(node => $attr,
4869 wakaba 1.104 type => 'coords:syntax error',
4870     level => $self->{level}->{must});
4871 wakaba 1.1 }
4872     },
4873 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4874     target => $HTMLTargetAttrChecker,
4875 wakaba 1.1 href => $HTMLURIAttrChecker,
4876     ping => $HTMLSpaceURIsAttrChecker,
4877 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4878 wakaba 1.1 media => $HTMLMQAttrChecker,
4879     hreflang => $HTMLLanguageTagAttrChecker,
4880     type => $HTMLIMTAttrChecker,
4881     }->{$attr_ln};
4882     if ($checker) {
4883     $attr{$attr_ln} = $attr;
4884 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4885     $attr_ln !~ /[A-Z]/) {
4886 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4887     $status = $HTMLDatasetAttrStatus;
4888 wakaba 1.1 } else {
4889     $checker = $HTMLAttrChecker->{$attr_ln};
4890     }
4891     }
4892     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4893 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4894     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4895     || $AttrStatus->{$attr_ns}->{''};
4896     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4897 wakaba 1.62
4898 wakaba 1.1 if ($checker) {
4899 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4900 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4901 wakaba 1.54 #
4902 wakaba 1.1 } else {
4903 wakaba 1.104 $self->{onerror}->(node => $attr,
4904     type => 'unknown attribute',
4905     level => $self->{level}->{uncertain});
4906 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4907     }
4908 wakaba 1.49
4909 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4910 wakaba 1.1 }
4911    
4912     if (defined $attr{href}) {
4913 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4914 wakaba 1.137 if (defined $attr{alt}) {
4915     my $url = $attr{href}->value; ## TODO: resolve
4916     if (length $attr{alt}->value) {
4917     for (@{$self->{flag}->{in_map} or []}) {
4918     $_->{$url} = 0;
4919     }
4920     } else {
4921     ## NOTE: Empty |alt=""|. If there is another |area| element
4922     ## with the same |href=""| and that |area| elemnet's
4923     ## |alt=""| attribute is not an empty string, then this
4924     ## is conforming.
4925     for (@{$self->{flag}->{in_map} or []}) {
4926     push @{$_->{$url} ||= []}, $attr{alt}
4927     unless exists $_->{$url} and not $_->{$url};
4928     }
4929     }
4930     } else {
4931 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4932 wakaba 1.104 type => 'attribute missing',
4933     text => 'alt',
4934     level => $self->{level}->{must});
4935 wakaba 1.1 }
4936     } else {
4937     for (qw/target ping rel media hreflang type alt/) {
4938     if (defined $attr{$_}) {
4939     $self->{onerror}->(node => $attr{$_},
4940 wakaba 1.104 type => 'attribute not allowed',
4941     level => $self->{level}->{must});
4942 wakaba 1.1 }
4943     }
4944     }
4945    
4946     my $shape = 'rectangle';
4947     if (defined $attr{shape}) {
4948     $shape = {
4949     circ => 'circle', circle => 'circle',
4950     default => 'default',
4951     poly => 'polygon', polygon => 'polygon',
4952     rect => 'rectangle', rectangle => 'rectangle',
4953     }->{lc $attr{shape}->value} || 'rectangle';
4954     ## TODO: ASCII lowercase?
4955     }
4956    
4957     if ($shape eq 'circle') {
4958     if (defined $attr{coords}) {
4959     if (defined $coords) {
4960     if (@$coords == 3) {
4961     if ($coords->[2] < 0) {
4962     $self->{onerror}->(node => $attr{coords},
4963 wakaba 1.104 type => 'coords:out of range',
4964     index => 2,
4965     value => $coords->[2],
4966     level => $self->{level}->{must});
4967 wakaba 1.1 }
4968     } else {
4969     $self->{onerror}->(node => $attr{coords},
4970 wakaba 1.104 type => 'coords:number not 3',
4971     text => 0+@$coords,
4972     level => $self->{level}->{must});
4973 wakaba 1.1 }
4974     } else {
4975     ## NOTE: A syntax error has been reported.
4976     }
4977     } else {
4978 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4979 wakaba 1.104 type => 'attribute missing',
4980     text => 'coords',
4981     level => $self->{level}->{must});
4982 wakaba 1.1 }
4983     } elsif ($shape eq 'default') {
4984     if (defined $attr{coords}) {
4985     $self->{onerror}->(node => $attr{coords},
4986 wakaba 1.104 type => 'attribute not allowed',
4987     level => $self->{level}->{must});
4988 wakaba 1.1 }
4989     } elsif ($shape eq 'polygon') {
4990     if (defined $attr{coords}) {
4991     if (defined $coords) {
4992     if (@$coords >= 6) {
4993     unless (@$coords % 2 == 0) {
4994     $self->{onerror}->(node => $attr{coords},
4995 wakaba 1.104 type => 'coords:number not even',
4996     text => 0+@$coords,
4997     level => $self->{level}->{must});
4998 wakaba 1.1 }
4999     } else {
5000     $self->{onerror}->(node => $attr{coords},
5001 wakaba 1.104 type => 'coords:number lt 6',
5002     text => 0+@$coords,
5003     level => $self->{level}->{must});
5004 wakaba 1.1 }
5005     } else {
5006     ## NOTE: A syntax error has been reported.
5007     }
5008     } else {
5009 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5010 wakaba 1.104 type => 'attribute missing',
5011     text => 'coords',
5012     level => $self->{level}->{must});
5013 wakaba 1.1 }
5014     } elsif ($shape eq 'rectangle') {
5015     if (defined $attr{coords}) {
5016     if (defined $coords) {
5017     if (@$coords == 4) {
5018     unless ($coords->[0] < $coords->[2]) {
5019     $self->{onerror}->(node => $attr{coords},
5020 wakaba 1.104 type => 'coords:out of range',
5021     index => 0,
5022     value => $coords->[0],
5023     level => $self->{level}->{must});
5024 wakaba 1.1 }
5025     unless ($coords->[1] < $coords->[3]) {
5026     $self->{onerror}->(node => $attr{coords},
5027 wakaba 1.104 type => 'coords:out of range',
5028     index => 1,
5029     value => $coords->[1],
5030     level => $self->{level}->{must});
5031 wakaba 1.1 }
5032     } else {
5033     $self->{onerror}->(node => $attr{coords},
5034 wakaba 1.104 type => 'coords:number not 4',
5035     text => 0+@$coords,
5036     level => $self->{level}->{must});
5037 wakaba 1.1 }
5038     } else {
5039     ## NOTE: A syntax error has been reported.
5040     }
5041     } else {
5042 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5043 wakaba 1.104 type => 'attribute missing',
5044     text => 'coords',
5045     level => $self->{level}->{must});
5046 wakaba 1.1 }
5047     }
5048 wakaba 1.66
5049     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
5050 wakaba 1.1 },
5051 wakaba 1.59 check_start => sub {
5052     my ($self, $item, $element_state) = @_;
5053     unless ($self->{flag}->{in_map} or
5054     not $item->{node}->manakai_parent_element) {
5055     $self->{onerror}->(node => $item->{node},
5056     type => 'element not allowed:area',
5057 wakaba 1.104 level => $self->{level}->{must});
5058 wakaba 1.59 }
5059 wakaba 1.79
5060     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5061     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5062 wakaba 1.59 },
5063 wakaba 1.1 };
5064    
5065     $Element->{$HTML_NS}->{table} = {
5066 wakaba 1.40 %HTMLChecker,
5067 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5068 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5069 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
5070     cellspacing => $HTMLLengthAttrChecker,
5071 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
5072     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
5073     lhs => 1, rhs => 1, box => 1, border => 1,
5074     }),
5075     rules => $GetHTMLEnumeratedAttrChecker->({
5076     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
5077     }),
5078     summary => sub {}, ## NOTE: %Text; in HTML4.
5079     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
5080     }, {
5081 wakaba 1.49 %HTMLAttrStatus,
5082 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5083 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5084     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5085     border => FEATURE_M12N10_REC,
5086     cellpadding => FEATURE_M12N10_REC,
5087     cellspacing => FEATURE_M12N10_REC,
5088 wakaba 1.61 cols => FEATURE_RFC1942,
5089 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5090     dataformatas => FEATURE_HTML4_REC_RESERVED,
5091     datapagesize => FEATURE_M12N10_REC,
5092     datasrc => FEATURE_HTML4_REC_RESERVED,
5093     frame => FEATURE_M12N10_REC,
5094 wakaba 1.187 lang => FEATURE_HTML5_REC,
5095 wakaba 1.49 rules => FEATURE_M12N10_REC,
5096     summary => FEATURE_M12N10_REC,
5097     width => FEATURE_M12N10_REC,
5098     }),
5099 wakaba 1.40 check_start => sub {
5100     my ($self, $item, $element_state) = @_;
5101     $element_state->{phase} = 'before caption';
5102 wakaba 1.66
5103     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5104 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5105     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5106 wakaba 1.40 },
5107     check_child_element => sub {
5108     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5109     $child_is_transparent, $element_state) = @_;
5110 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5111     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5112 wakaba 1.40 $self->{onerror}->(node => $child_el,
5113     type => 'element not allowed:minus',
5114 wakaba 1.104 level => $self->{level}->{must});
5115 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5116     #
5117     } elsif ($element_state->{phase} eq 'in tbodys') {
5118     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5119     #$element_state->{phase} = 'in tbodys';
5120     } elsif (not $element_state->{has_tfoot} and
5121     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5122     $element_state->{phase} = 'after tfoot';
5123     $element_state->{has_tfoot} = 1;
5124     } else {
5125 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5126     level => $self->{level}->{must});
5127 wakaba 1.40 }
5128     } elsif ($element_state->{phase} eq 'in trs') {
5129     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5130     #$element_state->{phase} = 'in trs';
5131     } elsif (not $element_state->{has_tfoot} and
5132     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5133     $element_state->{phase} = 'after tfoot';
5134     $element_state->{has_tfoot} = 1;
5135     } else {
5136 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5137     level => $self->{level}->{must});
5138 wakaba 1.40 }
5139     } elsif ($element_state->{phase} eq 'after thead') {
5140     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5141     $element_state->{phase} = 'in tbodys';
5142     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5143     $element_state->{phase} = 'in trs';
5144     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5145     $element_state->{phase} = 'in tbodys';
5146     $element_state->{has_tfoot} = 1;
5147     } else {
5148 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5149     level => $self->{level}->{must});
5150 wakaba 1.40 }
5151     } elsif ($element_state->{phase} eq 'in colgroup') {
5152     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5153     $element_state->{phase} = 'in colgroup';
5154     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5155     $element_state->{phase} = 'after thead';
5156     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5157     $element_state->{phase} = 'in tbodys';
5158     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5159     $element_state->{phase} = 'in trs';
5160     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5161     $element_state->{phase} = 'in tbodys';
5162     $element_state->{has_tfoot} = 1;
5163     } else {
5164 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5165     level => $self->{level}->{must});
5166 wakaba 1.40 }
5167     } elsif ($element_state->{phase} eq 'before caption') {
5168     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5169 wakaba 1.181 $item->{parent_state}->{table_caption_element} = $child_el;
5170 wakaba 1.40 $element_state->{phase} = 'in colgroup';
5171     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5172     $element_state->{phase} = 'in colgroup';
5173     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5174     $element_state->{phase} = 'after thead';
5175     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5176     $element_state->{phase} = 'in tbodys';
5177     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5178     $element_state->{phase} = 'in trs';
5179     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5180     $element_state->{phase} = 'in tbodys';
5181     $element_state->{has_tfoot} = 1;
5182     } else {
5183 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5184     level => $self->{level}->{must});
5185 wakaba 1.40 }
5186     } elsif ($element_state->{phase} eq 'after tfoot') {
5187 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5188     level => $self->{level}->{must});
5189 wakaba 1.40 } else {
5190     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5191     }
5192     },
5193     check_child_text => sub {
5194     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5195     if ($has_significant) {
5196 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5197     level => $self->{level}->{must});
5198 wakaba 1.1 }
5199 wakaba 1.40 },
5200     check_end => sub {
5201     my ($self, $item, $element_state) = @_;
5202 wakaba 1.1
5203     ## Table model errors
5204     require Whatpm::HTMLTable;
5205 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5206 wakaba 1.104 $self->{onerror}->(@_);
5207     }, $self->{level});
5208 wakaba 1.87 Whatpm::HTMLTable->assign_header
5209 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5210 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5211 wakaba 1.1
5212 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5213 wakaba 1.1 },
5214     };
5215    
5216     $Element->{$HTML_NS}->{caption} = {
5217 wakaba 1.169 %HTMLFlowContentChecker,
5218 wakaba 1.187 status => FEATURE_HTML5_REC,
5219 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5220     align => $GetHTMLEnumeratedAttrChecker->({
5221     top => 1, bottom => 1, left => 1, right => 1,
5222     }),
5223     }, {
5224 wakaba 1.49 %HTMLAttrStatus,
5225 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5226 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5227 wakaba 1.187 lang => FEATURE_HTML5_REC,
5228 wakaba 1.49 }),
5229 wakaba 1.169 check_start => sub {
5230     my ($self, $item, $element_state) = @_;
5231     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5232    
5233     $HTMLFlowContentChecker{check_start}->(@_);
5234     },
5235     check_end => sub {
5236     my ($self, $item, $element_state) = @_;
5237     $self->_remove_minus_elements ($element_state);
5238    
5239     $HTMLFlowContentChecker{check_end}->(@_);
5240     },
5241     }; # caption
5242 wakaba 1.1
5243 wakaba 1.69 my %cellalign = (
5244     ## HTML4 %cellhalign;
5245 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5246     left => 1, center => 1, right => 1, justify => 1, char => 1,
5247     }),
5248     char => sub {
5249     my ($self, $attr) = @_;
5250 wakaba 1.69
5251 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5252    
5253     my $value = $attr->value;
5254     if (length $value != 1) {
5255     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5256 wakaba 1.105 level => $self->{level}->{html4_fact});
5257 wakaba 1.70 }
5258     },
5259 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5260    
5261 wakaba 1.69 ## HTML4 %cellvalign;
5262 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5263     top => 1, middle => 1, bottom => 1, baseline => 1,
5264     }),
5265 wakaba 1.69 );
5266    
5267 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5268 wakaba 1.40 %HTMLEmptyChecker,
5269 wakaba 1.187 status => FEATURE_HTML5_REC,
5270 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5271 wakaba 1.69 %cellalign,
5272 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5273     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5274     ## TODO: "attribute not supported" if |col|.
5275     ## ISSUE: MUST NOT if any |col|?
5276     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5277 wakaba 1.49 }, {
5278     %HTMLAttrStatus,
5279 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5280 wakaba 1.49 align => FEATURE_M12N10_REC,
5281     char => FEATURE_M12N10_REC,
5282     charoff => FEATURE_M12N10_REC,
5283 wakaba 1.187 lang => FEATURE_HTML5_REC,
5284 wakaba 1.153 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5285 wakaba 1.49 valign => FEATURE_M12N10_REC,
5286     width => FEATURE_M12N10_REC,
5287 wakaba 1.1 }),
5288 wakaba 1.40 check_child_element => sub {
5289     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5290     $child_is_transparent, $element_state) = @_;
5291 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5292     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5293 wakaba 1.40 $self->{onerror}->(node => $child_el,
5294     type => 'element not allowed:minus',
5295 wakaba 1.104 level => $self->{level}->{must});
5296 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5297     #
5298     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5299     #
5300     } else {
5301 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5302     level => $self->{level}->{must});
5303 wakaba 1.40 }
5304     },
5305     check_child_text => sub {
5306     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5307     if ($has_significant) {
5308 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5309     level => $self->{level}->{must});
5310 wakaba 1.1 }
5311     },
5312     };
5313    
5314     $Element->{$HTML_NS}->{col} = {
5315 wakaba 1.40 %HTMLEmptyChecker,
5316 wakaba 1.187 status => FEATURE_HTML5_REC,
5317 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5318 wakaba 1.69 %cellalign,
5319 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5320 wakaba 1.49 }, {
5321     %HTMLAttrStatus,
5322 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5323 wakaba 1.49 align => FEATURE_M12N10_REC,
5324     char => FEATURE_M12N10_REC,
5325     charoff => FEATURE_M12N10_REC,
5326 wakaba 1.187 lang => FEATURE_HTML5_REC,
5327 wakaba 1.153 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5328 wakaba 1.49 valign => FEATURE_M12N10_REC,
5329     width => FEATURE_M12N10_REC,
5330 wakaba 1.1 }),
5331     };
5332    
5333     $Element->{$HTML_NS}->{tbody} = {
5334 wakaba 1.40 %HTMLChecker,
5335 wakaba 1.187 status => FEATURE_HTML5_REC,
5336 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5337     %cellalign,
5338     }, {
5339 wakaba 1.49 %HTMLAttrStatus,
5340 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5341 wakaba 1.49 align => FEATURE_M12N10_REC,
5342     char => FEATURE_M12N10_REC,
5343     charoff => FEATURE_M12N10_REC,
5344 wakaba 1.187 lang => FEATURE_HTML5_REC,
5345 wakaba 1.49 valign => FEATURE_M12N10_REC,
5346     }),
5347 wakaba 1.40 check_child_element => sub {
5348     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5349     $child_is_transparent, $element_state) = @_;
5350 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5351     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5352 wakaba 1.40 $self->{onerror}->(node => $child_el,
5353     type => 'element not allowed:minus',
5354 wakaba 1.104 level => $self->{level}->{must});
5355 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5356     #
5357     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5358 wakaba 1.84 #
5359 wakaba 1.40 } else {
5360 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5361     level => $self->{level}->{must});
5362 wakaba 1.40 }
5363     },
5364     check_child_text => sub {
5365     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5366     if ($has_significant) {
5367 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5368     level => $self->{level}->{must});
5369 wakaba 1.1 }
5370 wakaba 1.40 },
5371 wakaba 1.1 };
5372    
5373     $Element->{$HTML_NS}->{thead} = {
5374 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5375 wakaba 1.1 };
5376    
5377     $Element->{$HTML_NS}->{tfoot} = {
5378 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5379 wakaba 1.1 };
5380    
5381     $Element->{$HTML_NS}->{tr} = {
5382 wakaba 1.40 %HTMLChecker,
5383 wakaba 1.187 status => FEATURE_HTML5_REC,
5384 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5385     %cellalign,
5386     bgcolor => $HTMLColorAttrChecker,
5387     }, {
5388 wakaba 1.49 %HTMLAttrStatus,
5389 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5390 wakaba 1.49 align => FEATURE_M12N10_REC,
5391     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5392     char => FEATURE_M12N10_REC,
5393     charoff => FEATURE_M12N10_REC,
5394 wakaba 1.187 lang => FEATURE_HTML5_REC,
5395 wakaba 1.49 valign => FEATURE_M12N10_REC,
5396     }),
5397 wakaba 1.40 check_child_element => sub {
5398     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5399     $child_is_transparent, $element_state) = @_;
5400 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5401     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5402 wakaba 1.40 $self->{onerror}->(node => $child_el,
5403     type => 'element not allowed:minus',
5404 wakaba 1.104 level => $self->{level}->{must});
5405 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5406     #
5407     } elsif ($child_nsuri eq $HTML_NS and
5408     ($child_ln eq 'td' or $child_ln eq 'th')) {
5409 wakaba 1.84 #
5410 wakaba 1.40 } else {
5411 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5412     level => $self->{level}->{must});
5413 wakaba 1.40 }
5414     },
5415     check_child_text => sub {
5416     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5417     if ($has_significant) {
5418 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5419     level => $self->{level}->{must});
5420 wakaba 1.1 }
5421     },
5422     };
5423    
5424     $Element->{$HTML_NS}->{td} = {
5425 wakaba 1.72 %HTMLFlowContentChecker,
5426 wakaba 1.187 status => FEATURE_HTML5_REC,
5427 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5428 wakaba 1.69 %cellalign,
5429     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5430     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5431     bgcolor => $HTMLColorAttrChecker,
5432 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5433 wakaba 1.87 headers => sub {
5434     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5435     ## Though that method does not check the |headers| attribute of a
5436     ## |td| element if the element does not form a table, in that case
5437     ## the |td| element is non-conforming anyway.
5438     },
5439 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5440 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5441 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5442     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5443 wakaba 1.49 }, {
5444     %HTMLAttrStatus,
5445 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5446     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5447 wakaba 1.49 align => FEATURE_M12N10_REC,
5448 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5449 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5450     char => FEATURE_M12N10_REC,
5451     charoff => FEATURE_M12N10_REC,
5452 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5453 wakaba 1.187 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5454 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5455 wakaba 1.187 lang => FEATURE_HTML5_REC,
5456 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5457 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5458 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5459 wakaba 1.49 valign => FEATURE_M12N10_REC,
5460     width => FEATURE_M12N10_REC_DEPRECATED,
5461 wakaba 1.1 }),
5462     };
5463    
5464     $Element->{$HTML_NS}->{th} = {
5465 wakaba 1.40 %HTMLPhrasingContentChecker,
5466 wakaba 1.187 status => FEATURE_HTML5_REC,
5467 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5468 wakaba 1.69 %cellalign,
5469     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5470     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5471     bgcolor => $HTMLColorAttrChecker,
5472 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5473 wakaba 1.87 ## TODO: HTML4(?) |headers|
5474 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5475 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5476     scope => $GetHTMLEnumeratedAttrChecker
5477     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5478 wakaba 1.49 }, {
5479     %HTMLAttrStatus,
5480 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5481     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5482 wakaba 1.49 align => FEATURE_M12N10_REC,
5483 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5484 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5485     char => FEATURE_M12N10_REC,
5486     charoff => FEATURE_M12N10_REC,
5487 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5488 wakaba 1.187 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5489 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5490 wakaba 1.187 lang => FEATURE_HTML5_REC,
5491 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5492 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5493 wakaba 1.187 scope => FEATURE_HTML5_REC,
5494 wakaba 1.49 valign => FEATURE_M12N10_REC,
5495     width => FEATURE_M12N10_REC_DEPRECATED,
5496 wakaba 1.1 }),
5497     };
5498    
5499 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5500 wakaba 1.121 %HTMLFlowContentChecker,
5501 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5502 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5503 wakaba 1.161 accept => $AcceptAttrChecker,
5504 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5505 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5506 wakaba 1.185 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5507     on => 1, off => 1,
5508     }),
5509 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5510 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5511     'application/x-www-form-urlencoded' => 1,
5512     'multipart/form-data' => 1,
5513     'text/plain' => 1,
5514     }),
5515 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5516     get => 1, post => 1, put => 1, delete => 1,
5517     }),
5518 wakaba 1.133 name => sub {
5519     my ($self, $attr) = @_;
5520    
5521     my $value = $attr->value;
5522     if ($value eq '') {
5523     $self->{onerror}->(type => 'empty form name',
5524     node => $attr,
5525     level => $self->{level}->{must});
5526     } else {
5527     if ($self->{form}->{$value}) {
5528     $self->{onerror}->(type => 'duplicate form name',
5529     node => $attr,
5530     value => $value,
5531     level => $self->{level}->{must});
5532     } else {
5533     $self->{form}->{$value} = 1;
5534     }
5535     }
5536     },
5537 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5538     ## TODO: Tests for following attrs:
5539 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5540     onforminput => $HTMLEventHandlerAttrChecker,
5541 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5542     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5543 wakaba 1.52 target => $HTMLTargetAttrChecker,
5544     }, {
5545     %HTMLAttrStatus,
5546     %HTMLM12NCommonAttrStatus,
5547 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5548 wakaba 1.187 'accept-charset' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5549 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5550 wakaba 1.185 autocomplete => FEATURE_HTML5_WD,
5551 wakaba 1.56 data => FEATURE_WF2,
5552 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5553 wakaba 1.187 lang => FEATURE_HTML5_REC,
5554 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5555     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5556 wakaba 1.187 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5557 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5558 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5559     onforminput => FEATURE_WF2_INFORMATIVE,
5560 wakaba 1.56 onreceived => FEATURE_WF2,
5561 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5562     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5563 wakaba 1.56 replace => FEATURE_WF2,
5564 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5565     sdasuff => FEATURE_HTML20_RFC,
5566 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5567 wakaba 1.52 }),
5568 wakaba 1.66 check_start => sub {
5569     my ($self, $item, $element_state) = @_;
5570 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5571 wakaba 1.66
5572     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5573     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5574 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5575     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5576 wakaba 1.136 $element_state->{id_type} = 'form';
5577 wakaba 1.66 },
5578 wakaba 1.121 check_end => sub {
5579     my ($self, $item, $element_state) = @_;
5580     $self->_remove_minus_elements ($element_state);
5581    
5582     $HTMLFlowContentChecker{check_end}->(@_);
5583     },
5584 wakaba 1.185 }; # form
5585 wakaba 1.52
5586     $Element->{$HTML_NS}->{fieldset} = {
5587 wakaba 1.134 %HTMLFlowContentChecker,
5588 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5589 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5590     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5591 wakaba 1.136 form => $HTMLFormAttrChecker,
5592 wakaba 1.165 name => $FormControlNameAttrChecker,
5593 wakaba 1.56 }, {
5594 wakaba 1.52 %HTMLAttrStatus,
5595     %HTMLM12NCommonAttrStatus,
5596 wakaba 1.187 disabled => FEATURE_HTML5_WD | FEATURE_WF2X,
5597     form => FEATURE_HTML5_LC | FEATURE_WF2X,
5598     lang => FEATURE_HTML5_REC,
5599     name => FEATURE_HTML5_LC,
5600 wakaba 1.52 }),
5601 wakaba 1.134 ## NOTE: legend, Flow
5602     check_child_element => sub {
5603     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5604     $child_is_transparent, $element_state) = @_;
5605     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5606     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5607     $self->{onerror}->(node => $child_el,
5608     type => 'element not allowed:minus',
5609     level => $self->{level}->{must});
5610     $element_state->{has_non_legend} = 1;
5611     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5612     #
5613     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5614     if ($element_state->{has_non_legend}) {
5615     $self->{onerror}->(node => $child_el,
5616     type => 'element not allowed:details legend',
5617     level => $self->{level}->{must});
5618     }
5619     $element_state->{has_legend} = 1;
5620     $element_state->{has_non_legend} = 1;
5621     } else {
5622     $HTMLFlowContentChecker{check_child_element}->(@_);
5623     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5624     ## TODO:
5625 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5626 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5627     ## therefore |details| part of the content model does not match.
5628     }
5629     },
5630     check_child_text => sub {
5631     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5632     if ($has_significant) {
5633     $element_state->{has_non_legend} = 1;
5634     }
5635     },
5636     check_end => sub {
5637     my ($self, $item, $element_state) = @_;
5638    
5639     unless ($element_state->{has_legend}) {
5640     $self->{onerror}->(node => $item->{node},
5641     type => 'child element missing',
5642     text => 'legend',
5643     level => $self->{level}->{must});
5644     }
5645    
5646     $HTMLFlowContentChecker{check_end}->(@_);
5647 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5648 wakaba 1.134 },
5649     ## NOTE: This definition is partially reused by |details| element's
5650     ## checker.
5651 wakaba 1.52 };
5652    
5653     $Element->{$HTML_NS}->{input} = {
5654 wakaba 1.119 %HTMLEmptyChecker,
5655 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5656 wakaba 1.140 check_attrs => sub {
5657     my ($self, $item, $element_state) = @_;
5658 wakaba 1.142
5659 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5660 wakaba 1.142 $state = 'text' unless defined $state;
5661     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5662    
5663 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5664     my $attr_ns = $attr->namespace_uri;
5665     $attr_ns = '' unless defined $attr_ns;
5666     my $attr_ln = $attr->manakai_local_name;
5667     my $checker;
5668     my $status;
5669     if ($attr_ns eq '') {
5670     $status =
5671     {
5672     %HTMLAttrStatus,
5673     %HTMLM12NCommonAttrStatus,
5674     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5675     'accept-charset' => FEATURE_HTML2X_RFC,
5676 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5677 wakaba 1.140 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5678     align => FEATURE_M12N10_REC_DEPRECATED,
5679     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5680 wakaba 1.185 autocomplete => FEATURE_HTML5_LC | FEATURE_WF2X,
5681 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
5682     checked => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5683 wakaba 1.140 datafld => FEATURE_HTML4_REC_RESERVED,
5684     dataformatas => FEATURE_HTML4_REC_RESERVED,
5685     datasrc => FEATURE_HTML4_REC_RESERVED,
5686 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5687 wakaba 1.140 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5688 wakaba 1.187 form => FEATURE_HTML5_LC | FEATURE_WF2X,
5689 wakaba 1.178 height => FEATURE_HTML5_LC,
5690 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5691     FEATURE_XHTMLBASIC11_CR,
5692 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5693 wakaba 1.187 lang => FEATURE_HTML5_REC,
5694     list => FEATURE_HTML5_LC | FEATURE_WF2X,
5695     max => FEATURE_HTML5_LC | FEATURE_WF2X,
5696     maxlength => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5697 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5698 wakaba 1.187 min => FEATURE_HTML5_LC | FEATURE_WF2X,
5699     multiple => FEATURE_HTML5_LC,
5700     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5701 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5702 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5703     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5704     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5705     onformchange => FEATURE_WF2_INFORMATIVE,
5706     onforminput => FEATURE_WF2_INFORMATIVE,
5707     oninput => FEATURE_WF2,
5708     oninvalid => FEATURE_WF2,
5709     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5710 wakaba 1.187 pattern => FEATURE_HTML5_LC | FEATURE_WF2X,
5711     placeholder => FEATURE_HTML5_LC,
5712     readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5713 wakaba 1.140 replace => FEATURE_WF2,
5714 wakaba 1.187 required => FEATURE_HTML5_LC | FEATURE_WF2X,
5715 wakaba 1.140 sdapref => FEATURE_HTML20_RFC,
5716 wakaba 1.187 size => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5717 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5718 wakaba 1.187 step => FEATURE_HTML5_LC | FEATURE_WF2X,
5719 wakaba 1.140 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5720     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5721 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5722 wakaba 1.187 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5723 wakaba 1.140 usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5724 wakaba 1.187 value => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5725 wakaba 1.178 width => FEATURE_HTML5_LC,
5726 wakaba 1.140 }->{$attr_ln};
5727    
5728     $checker =
5729     {
5730 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5731     ## applicable for a specific set of states.
5732 wakaba 1.142 accept => '',
5733 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5734     ## NOTE: To which states it applies is not defined in RFC 2070.
5735 wakaba 1.142 action => '',
5736 wakaba 1.150 align => '',
5737 wakaba 1.141 alt => '',
5738 wakaba 1.142 autocomplete => '',
5739 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5740     ## NOTE: <input type=hidden disabled> is not disallowed.
5741 wakaba 1.142 checked => '',
5742     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5743 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5744 wakaba 1.142 enctype => '',
5745     form => $HTMLFormAttrChecker,
5746 wakaba 1.178 height => '',
5747 wakaba 1.150 inputmode => '',
5748     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5749 wakaba 1.142 list => '',
5750     max => '',
5751     maxlength => '',
5752     method => '',
5753     min => '',
5754 wakaba 1.156 multiple => '',
5755 wakaba 1.165 name => $FormControlNameAttrChecker,
5756 wakaba 1.166 novalidate => '',
5757 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5758     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5759     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5760     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5761     ## TODO: tests for four attributes above
5762 wakaba 1.142 pattern => '',
5763 wakaba 1.156 placeholder => '',
5764 wakaba 1.142 readonly => '',
5765 wakaba 1.150 replace => '',
5766 wakaba 1.142 required => '',
5767     size => '',
5768     src => '',
5769     step => '',
5770     target => '',
5771 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5772 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5773     email => 1, password => 1,
5774 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5775 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5776     checkbox => 1,
5777 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5778     button => 1,
5779 wakaba 1.140 }),
5780 wakaba 1.151 usemap => '',
5781 wakaba 1.142 value => '',
5782 wakaba 1.178 width => '',
5783 wakaba 1.140 }->{$attr_ln};
5784 wakaba 1.141
5785     ## State-dependent checkers
5786     unless ($checker) {
5787     if ($state eq 'hidden') {
5788     $checker =
5789     {
5790 wakaba 1.142 value => sub {
5791     my ($self, $attr, $item, $element_state) = @_;
5792 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5793 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5794     $self->{onerror}->(node => $attr,
5795     type => '_charset_ value',
5796     level => $self->{level}->{must});
5797     }
5798     },
5799 wakaba 1.141 }->{$attr_ln} || $checker;
5800 wakaba 1.142 ## TODO: Warn if no name attribute?
5801     ## TODO: Warn if name!=_charset_ and no value attribute?
5802 wakaba 1.168 } elsif ({
5803     datetime => 1, date => 1, month => 1, time => 1,
5804     week => 1, 'datetime-local' => 1,
5805     }->{$state}) {
5806     my $v = {
5807     datetime => ['global_date_and_time_string'],
5808     date => ['date_string'],
5809     month => ['month_string'],
5810     week => ['week_string'],
5811     time => ['time_string'],
5812     'datetime-local' => ['local_date_and_time_string'],
5813     }->{$state};
5814 wakaba 1.144 $checker =
5815     {
5816 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5817     on => 1, off => 1,
5818     }),
5819 wakaba 1.158 list => $ListAttrChecker,
5820 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5821     max => $GetDateTimeAttrChecker->($v->[0]),
5822 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5823 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5824 wakaba 1.148 step => $StepAttrChecker,
5825 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5826 wakaba 1.144 }->{$attr_ln} || $checker;
5827     } elsif ($state eq 'number') {
5828     $checker =
5829     {
5830 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5831     on => 1, off => 1,
5832     }),
5833 wakaba 1.158 list => $ListAttrChecker,
5834 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5835     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5836 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5837 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5838 wakaba 1.148 step => $StepAttrChecker,
5839 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5840 wakaba 1.144 }->{$attr_ln} || $checker;
5841     } elsif ($state eq 'range') {
5842     $checker =
5843     {
5844 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5845     on => 1, off => 1,
5846     }),
5847 wakaba 1.158 list => $ListAttrChecker,
5848 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5849     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5850 wakaba 1.148 step => $StepAttrChecker,
5851 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5852 wakaba 1.144 }->{$attr_ln} || $checker;
5853 wakaba 1.157 } elsif ($state eq 'color') {
5854     $checker =
5855     {
5856     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5857     on => 1, off => 1,
5858     }),
5859 wakaba 1.158 list => $ListAttrChecker,
5860 wakaba 1.157 value => sub {
5861     my ($self, $attr) = @_;
5862     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5863     $self->{onerror}->(node => $attr,
5864     type => 'scolor:syntax error', ## TODOC: type
5865     level => $self->{level}->{must});
5866     }
5867     },
5868     }->{$attr_ln} || $checker;
5869 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5870     $checker =
5871     {
5872 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5873     ## TODO: tests
5874 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5875 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5876     }->{$attr_ln} || $checker;
5877     ## TODO: There MUST be another input type=radio with same
5878     ## name (Radio state).
5879     ## ISSUE: There should be exactly one type=radio with checked?
5880     } elsif ($state eq 'file') {
5881     $checker =
5882     {
5883 wakaba 1.161 accept => $AcceptAttrChecker,
5884 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5885 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5886 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5887 wakaba 1.144 }->{$attr_ln} || $checker;
5888     } elsif ($state eq 'submit') {
5889     $checker =
5890     {
5891 wakaba 1.149 action => $HTMLURIAttrChecker,
5892 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5893     'application/x-www-form-urlencoded' => 1,
5894     'multipart/form-data' => 1,
5895     'text/plain' => 1,
5896     }),
5897 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5898     get => 1, post => 1, put => 1, delete => 1,
5899     }),
5900 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5901 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5902     document => 1, values => 1,
5903     }),
5904     target => $HTMLTargetAttrChecker,
5905 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5906     }->{$attr_ln} || $checker;
5907     } elsif ($state eq 'image') {
5908     $checker =
5909     {
5910 wakaba 1.149 action => $HTMLURIAttrChecker,
5911     align => $GetHTMLEnumeratedAttrChecker->({
5912     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5913     }),
5914 wakaba 1.144 alt => sub {
5915     my ($self, $attr) = @_;
5916     my $value = $attr->value;
5917     unless (length $value) {
5918     $self->{onerror}->(node => $attr,
5919     type => 'empty anchor image alt',
5920     level => $self->{level}->{must});
5921     }
5922     },
5923 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5924     'application/x-www-form-urlencoded' => 1,
5925     'multipart/form-data' => 1,
5926     'text/plain' => 1,
5927     }),
5928 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
5929 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5930     method => $GetHTMLEnumeratedAttrChecker->({
5931     get => 1, post => 1, put => 1, delete => 1,
5932     }),
5933 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5934 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5935     document => 1, values => 1,
5936     }),
5937 wakaba 1.144 src => $HTMLURIAttrChecker,
5938     ## TODO: There is requirements on the referenced resource.
5939 wakaba 1.149 target => $HTMLTargetAttrChecker,
5940     usemap => $HTMLUsemapAttrChecker,
5941 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
5942 wakaba 1.144 }->{$attr_ln} || $checker;
5943     ## TODO: alt & src are required.
5944     } elsif ({
5945     reset => 1, button => 1,
5946     ## NOTE: From Web Forms 2.0:
5947     remove => 1, 'move-up' => 1, 'move-down' => 1,
5948     add => 1,
5949     }->{$state}) {
5950     $checker =
5951     {
5952     ## NOTE: According to Web Forms 2.0, |input| attribute
5953     ## has |template| attribute to support the |add| button
5954     ## type (as part of the repetition template feature). It
5955     ## conflicts with the |template| global attribute
5956     ## introduced as part of the data template feature.
5957     ## NOTE: |template| attribute as defined in Web Forms 2.0
5958     ## has no author requirement.
5959     value => sub { }, ## NOTE: No restriction.
5960     }->{$attr_ln} || $checker;
5961 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5962 wakaba 1.141 $checker =
5963     {
5964 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5965     on => 1, off => 1,
5966     }),
5967 wakaba 1.149 ## TODO: inputmode [WF2]
5968 wakaba 1.158 list => $ListAttrChecker,
5969 wakaba 1.147 maxlength => sub {
5970     my ($self, $attr, $item, $element_state) = @_;
5971    
5972     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5973    
5974 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5975 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5976     ## integers results in a number.
5977     my $max_allowed_value_length = 0+$1;
5978    
5979     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5980     if (defined $value) {
5981     my $codepoint_length = length $value;
5982 wakaba 1.162
5983 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5984     $self->{onerror}
5985     ->(node => $item->{node}
5986     ->get_attribute_node_ns (undef, 'value'),
5987     type => 'value too long',
5988     level => $self->{level}->{must});
5989     }
5990     }
5991     }
5992     },
5993 wakaba 1.160 pattern => $PatternAttrChecker,
5994 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
5995 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5996 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5997 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5998 wakaba 1.143 value => sub {
5999 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
6000     if ($state eq 'url') {
6001     $HTMLURIAttrChecker->(@_);
6002     } elsif ($state eq 'email') {
6003     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
6004     my @addr = split /,/, $attr->value, -1;
6005     @addr = ('') unless @addr;
6006     for (@addr) {
6007 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
6008     s/[\x09\x0A\x0C\x0D\x20]\z//;
6009 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
6010     $self->{onerror}->(node => $attr,
6011     type => 'email:syntax error', ## TODO: type
6012     value => $_,
6013     level => $self->{level}->{must});
6014     }
6015     }
6016     } else {
6017     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
6018     $self->{onerror}->(node => $attr,
6019     type => 'email:syntax error', ## TODO: type
6020     level => $self->{level}->{must});
6021     }
6022     }
6023     } else {
6024     if ($attr->value =~ /[\x0D\x0A]/) {
6025     $self->{onerror}->(node => $attr,
6026     type => 'newline in value', ## TODO: type
6027     level => $self->{level}->{must});
6028     }
6029     }
6030 wakaba 1.143 },
6031 wakaba 1.141 }->{$attr_ln} || $checker;
6032 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
6033 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
6034     if $state eq 'email' and $attr_ln eq 'multiple';
6035 wakaba 1.161
6036     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6037     not $item->{node}->has_attribute_ns (undef, 'title')) {
6038     $self->{onerror}->(node => $item->{node},
6039     type => 'attribute missing',
6040     text => 'title',
6041     level => $self->{level}->{should});
6042     }
6043 wakaba 1.141 }
6044     }
6045    
6046     if (defined $checker) {
6047     if ($checker eq '') {
6048     $checker = sub {
6049     my ($self, $attr) = @_;
6050     $self->{onerror}->(node => $attr,
6051     type => 'input attr not applicable',
6052     text => $state,
6053     level => $self->{level}->{must});
6054     };
6055     }
6056 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
6057     $attr_ln !~ /[A-Z]/) {
6058     $checker = $HTMLDatasetAttrChecker;
6059     $status = $HTMLDatasetAttrStatus;
6060     } else {
6061     $checker = $HTMLAttrChecker->{$attr_ln};
6062     }
6063     }
6064     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
6065     || $AttrChecker->{$attr_ns}->{''};
6066     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
6067     || $AttrStatus->{$attr_ns}->{''};
6068     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
6069 wakaba 1.157
6070 wakaba 1.140 if ($checker) {
6071     $checker->($self, $attr, $item, $element_state) if ref $checker;
6072     } elsif ($attr_ns eq '' and not $status) {
6073     #
6074     } else {
6075     $self->{onerror}->(node => $attr,
6076     type => 'unknown attribute',
6077     level => $self->{level}->{uncertain});
6078     ## ISSUE: No comformance createria for unknown attributes in the spec
6079     }
6080    
6081     $self->_attr_status_info ($attr, $status);
6082     }
6083 wakaba 1.168
6084     ## ISSUE: -0/+0
6085    
6086     if ($state eq 'range') {
6087     $element_state->{number_value}->{min} ||= 0;
6088     $element_state->{number_value}->{max} = 100
6089     unless defined $element_state->{number_value}->{max};
6090     }
6091    
6092     if (defined $element_state->{date_value}->{min} or
6093     defined $element_state->{date_value}->{max}) {
6094     my $min_value = $element_state->{date_value}->{min};
6095     my $max_value = $element_state->{date_value}->{max};
6096     my $value_value = $element_state->{date_value}->{value};
6097    
6098     if (defined $min_value and $min_value eq '' and
6099     (defined $max_value or defined $value_value)) {
6100     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6101     $self->{onerror}->(node => $min,
6102     type => 'date value not supported', ## TODOC: type
6103     value => $min->value,
6104     level => $self->{level}->{unsupported});
6105     undef $min_value;
6106     }
6107     if (defined $max_value and $max_value eq '' and
6108     (defined $max_value or defined $value_value)) {
6109     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6110     $self->{onerror}->(node => $max,
6111     type => 'date value not supported', ## TODOC: type
6112     value => $max->value,
6113     level => $self->{level}->{unsupported});
6114     undef $max_value;
6115     }
6116     if (defined $value_value and $value_value eq '' and
6117     (defined $max_value or defined $min_value)) {
6118     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6119     $self->{onerror}->(node => $value,
6120     type => 'date value not supported', ## TODOC: type
6121     value => $value->value,
6122     level => $self->{level}->{unsupported});
6123     undef $value_value;
6124     }
6125    
6126     if (defined $min_value and defined $max_value) {
6127     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6128     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6129     $self->{onerror}->(node => $max,
6130     type => 'max lt min', ## TODOC: type
6131     level => $self->{level}->{must});
6132     }
6133     }
6134    
6135     if (defined $min_value and defined $value_value) {
6136     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6137     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6138     $self->{onerror}->(node => $value,
6139     type => 'value lt min', ## TODOC: type
6140     level => $self->{level}->{warn});
6141     ## NOTE: Not an error.
6142     }
6143     }
6144    
6145     if (defined $max_value and defined $value_value) {
6146     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6147     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6148     $self->{onerror}->(node => $value,
6149     type => 'value gt max', ## TODOC: type
6150     level => $self->{level}->{warn});
6151     ## NOTE: Not an error.
6152     }
6153     }
6154     } elsif (defined $element_state->{number_value}->{min} or
6155     defined $element_state->{number_value}->{max}) {
6156     my $min_value = $element_state->{number_value}->{min};
6157     my $max_value = $element_state->{number_value}->{max};
6158     my $value_value = $element_state->{number_value}->{value};
6159    
6160     if (defined $min_value and defined $max_value) {
6161     if ($min_value > $max_value) {
6162     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6163     $self->{onerror}->(node => $max,
6164     type => 'max lt min', ## TODOC: type
6165     level => $self->{level}->{must});
6166     }
6167     }
6168    
6169     if (defined $min_value and defined $value_value) {
6170     if ($min_value > $value_value) {
6171     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6172     $self->{onerror}->(node => $value,
6173     type => 'value lt min', ## TODOC: type
6174     level => $self->{level}->{warn});
6175     ## NOTE: Not an error.
6176     }
6177     }
6178    
6179     if (defined $max_value and defined $value_value) {
6180     if ($max_value < $value_value) {
6181     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6182     $self->{onerror}->(node => $value,
6183     type => 'value gt max', ## TODOC: type
6184     level => $self->{level}->{warn});
6185     ## NOTE: Not an error.
6186     }
6187     }
6188     }
6189 wakaba 1.150
6190 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6191    
6192 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6193     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6194     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6195     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6196     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6197 wakaba 1.140 },
6198 wakaba 1.66 check_start => sub {
6199     my ($self, $item, $element_state) = @_;
6200 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6201     $self->{onerror}->(node => $item->{node},
6202     type => 'multiple labelable fae',
6203     level => $self->{level}->{must});
6204     } else {
6205     $self->{flag}->{has_labelable} = 2;
6206     }
6207 wakaba 1.138
6208     $element_state->{id_type} = 'labelable';
6209 wakaba 1.66 },
6210 wakaba 1.52 };
6211    
6212 wakaba 1.178 ## XXXresource: Dimension attributes have requirements on width and
6213     ## height of referenced resource.
6214 wakaba 1.80
6215 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6216 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6217 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6218 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6219 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6220     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6221     ## |button| elements.
6222 wakaba 1.56 action => $HTMLURIAttrChecker,
6223 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6224 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6225 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6226     'application/x-www-form-urlencoded' => 1,
6227     'multipart/form-data' => 1,
6228     'text/plain' => 1,
6229     }),
6230 wakaba 1.136 form => $HTMLFormAttrChecker,
6231 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6232     get => 1, post => 1, put => 1, delete => 1,
6233     }),
6234 wakaba 1.165 name => $FormControlNameAttrChecker,
6235 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6236 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6237     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6238 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6239     target => $HTMLTargetAttrChecker,
6240 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6241     ## attribute to support the |add| button type (as part of repetition
6242     ## template feature). It conflicts with the |template| global attribute
6243     ## introduced as part of the data template feature.
6244     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6245     ## author requirement.
6246 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6247     button => 1, submit => 1, reset => 1,
6248     }),
6249 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6250 wakaba 1.52 }, {
6251     %HTMLAttrStatus,
6252     %HTMLM12NCommonAttrStatus,
6253 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
6254 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6255 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6256 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6257     dataformatas => FEATURE_HTML4_REC_RESERVED,
6258     datasrc => FEATURE_HTML4_REC_RESERVED,
6259 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6260 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6261 wakaba 1.187 form => FEATURE_HTML5_LC | FEATURE_WF2X,
6262     lang => FEATURE_HTML5_REC,
6263 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6264 wakaba 1.187 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6265 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6266 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6267     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6268 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6269     onforminput => FEATURE_WF2_INFORMATIVE,
6270 wakaba 1.56 replace => FEATURE_WF2,
6271 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6272 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6273 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6274 wakaba 1.187 type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6275     value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6276 wakaba 1.52 }),
6277 wakaba 1.66 check_start => sub {
6278     my ($self, $item, $element_state) = @_;
6279 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6280     $self->{onerror}->(node => $item->{node},
6281     type => 'multiple labelable fae',
6282     level => $self->{level}->{must});
6283     } else {
6284     $self->{flag}->{has_labelable} = 2;
6285     }
6286 wakaba 1.162
6287     ## ISSUE: "The value attribute must not be present unless the form
6288     ## [content] attribute is present.": Wrong?
6289 wakaba 1.139
6290 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6291     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6292 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6293     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6294 wakaba 1.138
6295     $element_state->{id_type} = 'labelable';
6296 wakaba 1.66 },
6297 wakaba 1.52 };
6298    
6299     $Element->{$HTML_NS}->{label} = {
6300 wakaba 1.139 %HTMLPhrasingContentChecker,
6301 wakaba 1.187 status => FEATURE_HTML5_REC,
6302 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6303 wakaba 1.138 for => sub {
6304     my ($self, $attr) = @_;
6305    
6306     ## NOTE: MUST be an ID of a labelable element.
6307    
6308     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6309     },
6310 wakaba 1.136 form => $HTMLFormAttrChecker,
6311 wakaba 1.52 }, {
6312     %HTMLAttrStatus,
6313 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6314 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2 | FEATURE_M12N10_REC,
6315 wakaba 1.187 for => FEATURE_HTML5_REC,
6316     form => FEATURE_HTML5_LC,
6317     lang => FEATURE_HTML5_REC,
6318 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6319     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6320     }),
6321 wakaba 1.139 check_start => sub {
6322     my ($self, $item, $element_state) = @_;
6323     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6324    
6325     $element_state->{has_label_original} = $self->{flag}->{has_label};
6326     $self->{flag}->{has_label} = 1;
6327     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6328 wakaba 1.155 $self->{flag}->{has_labelable}
6329     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6330 wakaba 1.139
6331     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6332     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6333     },
6334     check_end => sub {
6335     my ($self, $item, $element_state) = @_;
6336     $self->_remove_minus_elements ($element_state);
6337    
6338     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6339     $self->{flag}->{has_labelable}
6340     = $element_state->{has_labelable_original};
6341     }
6342     delete $self->{flag}->{has_label}
6343     unless $element_state->{has_label_original};
6344     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6345    
6346     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6347    
6348     $HTMLPhrasingContentChecker{check_end}->(@_);
6349     },
6350 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6351     };
6352    
6353     $Element->{$HTML_NS}->{select} = {
6354 wakaba 1.121 %HTMLChecker,
6355 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6356 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6357     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6358 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6359 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6360 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6361 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6362 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6363 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6364 wakaba 1.136 form => $HTMLFormAttrChecker,
6365 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6366 wakaba 1.165 name => $FormControlNameAttrChecker,
6367 wakaba 1.163 ## TODO: tests for on*
6368 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6369     onforminput => $HTMLEventHandlerAttrChecker,
6370     oninput => $HTMLEventHandlerAttrChecker,
6371 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6372 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6373 wakaba 1.52 }, {
6374     %HTMLAttrStatus,
6375     %HTMLM12NCommonAttrStatus,
6376 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2,
6377 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6378 wakaba 1.56 data => FEATURE_WF2,
6379 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6380     dataformatas => FEATURE_HTML4_REC_RESERVED,
6381     datasrc => FEATURE_HTML4_REC_RESERVED,
6382 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6383     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6384     lang => FEATURE_HTML5_REC,
6385     multiple => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6386     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6387 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6388     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6389 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6390     onforminput => FEATURE_WF2_INFORMATIVE,
6391 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6392 wakaba 1.126 oninput => FEATURE_WF2,
6393 wakaba 1.56 oninvalid => FEATURE_WF2,
6394 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6395     sdapref => FEATURE_HTML20_RFC,
6396 wakaba 1.187 size => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6397 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6398     }),
6399 wakaba 1.66 check_start => sub {
6400     my ($self, $item, $element_state) = @_;
6401 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6402     $self->{onerror}->(node => $item->{node},
6403     type => 'multiple labelable fae',
6404     level => $self->{level}->{must});
6405     } else {
6406     $self->{flag}->{has_labelable} = 2;
6407     }
6408 wakaba 1.66
6409     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6410     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6411 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6412     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6413 wakaba 1.138
6414     $element_state->{id_type} = 'labelable';
6415 wakaba 1.66 },
6416 wakaba 1.121 check_child_element => sub {
6417 wakaba 1.163 ## NOTE: (option | optgroup)*
6418    
6419 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6420     $child_is_transparent, $element_state) = @_;
6421 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6422     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6423 wakaba 1.121 $self->{onerror}->(node => $child_el,
6424     type => 'element not allowed:minus',
6425     level => $self->{level}->{must});
6426     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6427     #
6428     } elsif ($child_nsuri eq $HTML_NS and
6429     {
6430     option => 1, optgroup => 1,
6431     }->{$child_ln}) {
6432     #
6433     } else {
6434     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6435     level => $self->{level}->{must});
6436     }
6437     },
6438     check_child_text => sub {
6439     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6440     if ($has_significant) {
6441     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6442     level => $self->{level}->{must});
6443     }
6444     },
6445 wakaba 1.52 };
6446 wakaba 1.1
6447 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6448 wakaba 1.121 %HTMLPhrasingContentChecker,
6449 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X,
6450 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6451     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6452     }, {
6453 wakaba 1.52 %HTMLAttrStatus,
6454 wakaba 1.56 data => FEATURE_WF2,
6455 wakaba 1.52 }),
6456 wakaba 1.66 check_start => sub {
6457     my ($self, $item, $element_state) = @_;
6458    
6459 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6460    
6461 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6462 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6463     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6464 wakaba 1.158
6465     $element_state->{id_type} = 'datalist';
6466 wakaba 1.66 },
6467 wakaba 1.121 ## NOTE: phrasing | option*
6468     check_child_element => sub {
6469     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6470     $child_is_transparent, $element_state) = @_;
6471 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6472     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6473 wakaba 1.121 $self->{onerror}->(node => $child_el,
6474     type => 'element not allowed:minus',
6475     level => $self->{level}->{must});
6476     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6477     #
6478     } elsif ($element_state->{phase} eq 'phrasing') {
6479     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6480     #
6481     } else {
6482     $self->{onerror}->(node => $child_el,
6483     type => 'element not allowed:phrasing',
6484     level => $self->{level}->{must});
6485     }
6486     } elsif ($element_state->{phase} eq 'option') {
6487     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6488     #
6489     } else {
6490     $self->{onerror}->(node => $child_el,
6491     type => 'element not allowed',
6492     level => $self->{level}->{must});
6493     }
6494     } elsif ($element_state->{phase} eq 'any') {
6495     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6496     $element_state->{phase} = 'phrasing';
6497     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6498     $element_state->{phase} = 'option';
6499     } else {
6500     $self->{onerror}->(node => $child_el,
6501     type => 'element not allowed',
6502     level => $self->{level}->{must});
6503     }
6504     } else {
6505     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6506     }
6507     },
6508     check_child_text => sub {
6509     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6510     if ($has_significant) {
6511     if ($element_state->{phase} eq 'phrasing') {
6512     #
6513     } elsif ($element_state->{phase} eq 'any') {
6514     $element_state->{phase} = 'phrasing';
6515     } else {
6516     $self->{onerror}->(node => $child_node,
6517     type => 'character not allowed',
6518     level => $self->{level}->{must});
6519     }
6520     }
6521     },
6522     check_end => sub {
6523     my ($self, $item, $element_state) = @_;
6524     if ($element_state->{phase} eq 'phrasing') {
6525     if ($element_state->{has_significant}) {
6526     $item->{real_parent_state}->{has_significant} = 1;
6527     } elsif ($item->{transparent}) {
6528     #
6529     } else {
6530     $self->{onerror}->(node => $item->{node},
6531     type => 'no significant content',
6532     level => $self->{level}->{should});
6533     }
6534     } else {
6535     ## NOTE: Since the content model explicitly allows a |datalist| element
6536     ## being empty, we don't raise "no significant content" error for this
6537     ## element when there is no element. (We should raise an error for
6538     ## |<datalist><br></datalist>|, however.)
6539     ## NOTE: As a side-effect, when the |datalist| element only contains
6540     ## non-conforming content, then the |phase| flag has not changed from
6541     ## |any|, no "no significant content" error is raised neither.
6542     $HTMLChecker{check_end}->(@_);
6543     }
6544     },
6545 wakaba 1.52 };
6546 wakaba 1.49
6547 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6548 wakaba 1.121 %HTMLChecker,
6549 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6550 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6551     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6552 wakaba 1.164 label => sub {},
6553 wakaba 1.52 }, {
6554     %HTMLAttrStatus,
6555     %HTMLM12NCommonAttrStatus,
6556 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6557     label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6558     lang => FEATURE_HTML5_REC,
6559 wakaba 1.52 }),
6560 wakaba 1.164 check_attrs2 => sub {
6561     my ($self, $item, $element_state) = @_;
6562    
6563     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6564     $self->{onerror}->(node => $item->{node},
6565     type => 'attribute missing',
6566     text => 'label',
6567     level => $self->{level}->{must});
6568     }
6569     },
6570 wakaba 1.121 check_child_element => sub {
6571     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6572     $child_is_transparent, $element_state) = @_;
6573 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6574     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6575 wakaba 1.121 $self->{onerror}->(node => $child_el,
6576     type => 'element not allowed:minus',
6577     level => $self->{level}->{must});
6578     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6579     #
6580     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6581     #
6582     } else {
6583     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6584     level => $self->{level}->{must});
6585     }
6586     },
6587     check_child_text => sub {
6588     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6589     if ($has_significant) {
6590     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6591     level => $self->{level}->{must});
6592     }
6593     },
6594 wakaba 1.52 };
6595    
6596     $Element->{$HTML_NS}->{option} = {
6597     %HTMLTextChecker,
6598 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6599 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6600     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6601 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6602     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6603     value => sub {}, ## NOTE: No restriction.
6604 wakaba 1.52 }, {
6605     %HTMLAttrStatus,
6606     %HTMLM12NCommonAttrStatus,
6607 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6608     label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6609     lang => FEATURE_HTML5_REC,
6610 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6611     sdapref => FEATURE_HTML20_RFC,
6612 wakaba 1.187 selected => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6613     value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6614 wakaba 1.52 }),
6615     };
6616 wakaba 1.49
6617 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6618     %HTMLTextChecker,
6619 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6620 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6621 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6622 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6623 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6624 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6625 wakaba 1.136 form => $HTMLFormAttrChecker,
6626 wakaba 1.56 ## TODO: inputmode [WF2]
6627 wakaba 1.164 maxlength => sub {
6628     my ($self, $attr, $item, $element_state) = @_;
6629    
6630     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6631    
6632 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6633 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6634     ## results in a number.
6635     my $max_allowed_value_length = 0+$1;
6636    
6637     ## ISSUE: "The the purposes of this requirement," (typo)
6638    
6639     ## ISSUE: This constraint is applied w/o CRLF normalization to
6640     ## |value| attribute, but w/ CRLF normalization to
6641     ## concept-value.
6642     my $value = $item->{node}->text_content;
6643     if (defined $value) {
6644     my $codepoint_length = length $value;
6645    
6646     if ($codepoint_length > $max_allowed_value_length) {
6647     $self->{onerror}->(node => $item->{node},
6648     type => 'value too long',
6649     level => $self->{level}->{must});
6650     }
6651     }
6652     }
6653     },
6654 wakaba 1.165 name => $FormControlNameAttrChecker,
6655 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6656     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6657     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6658 wakaba 1.161 pattern => $PatternAttrChecker,
6659 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
6660 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6661 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6662 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6663     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6664     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6665 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6666 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6667 wakaba 1.52 }, {
6668     %HTMLAttrStatus,
6669     %HTMLM12NCommonAttrStatus,
6670 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6671 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6672 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
6673 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6674     cols => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6675 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6676 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6677     datasrc => FEATURE_HTML4_REC_RESERVED,
6678 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6679     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6680 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6681 wakaba 1.187 lang => FEATURE_HTML5_REC,
6682 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6683 wakaba 1.187 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6684 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6685     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6686     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6687 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6688     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6689     oninput => FEATURE_WF2, ## TODO: tests
6690     oninvalid => FEATURE_WF2, ## TODO: tests
6691 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6692 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6693 wakaba 1.179 placeholder => FEATURE_HTML5_LC,
6694 wakaba 1.187 readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6695     required => FEATURE_HTML5_LC | FEATURE_WF2X,
6696     rows => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6697 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6698     sdapref => FEATURE_HTML20_RFC,
6699 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6700 wakaba 1.187 wrap => FEATURE_HTML5_LC | FEATURE_WF2X,
6701 wakaba 1.52 }),
6702 wakaba 1.66 check_start => sub {
6703     my ($self, $item, $element_state) = @_;
6704 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6705     $self->{onerror}->(node => $item->{node},
6706     type => 'multiple labelable fae',
6707     level => $self->{level}->{must});
6708     } else {
6709     $self->{flag}->{has_labelable} = 2;
6710     }
6711 wakaba 1.164
6712     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6713     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6714     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6715    
6716     $element_state->{id_type} = 'labelable';
6717     },
6718     check_attrs2 => sub {
6719     my ($self, $item, $element_state) = @_;
6720 wakaba 1.66
6721 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6722     not $item->{node}->has_attribute_ns (undef, 'title')) {
6723     ## NOTE: WF2 (dropped by HTML5)
6724     $self->{onerror}->(node => $item->{node},
6725     type => 'attribute missing',
6726     text => 'title',
6727     level => $self->{level}->{should});
6728     }
6729    
6730 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6731     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6732     if (defined $wrap) {
6733     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6734     if ($wrap eq 'hard') {
6735     $self->{onerror}->(node => $item->{node},
6736     type => 'attribute missing',
6737     text => 'cols',
6738     level => $self->{level}->{must});
6739     }
6740     }
6741     }
6742 wakaba 1.66 },
6743 wakaba 1.52 };
6744 wakaba 1.49
6745 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6746 wakaba 1.121 %HTMLPhrasingContentChecker,
6747 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X,
6748 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6749 wakaba 1.165 for => sub {
6750     my ($self, $attr) = @_;
6751    
6752     ## NOTE: "Unordered set of unique space-separated tokens".
6753    
6754     my %word;
6755     for my $word (grep {length $_}
6756     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6757     unless ($word{$word}) {
6758     $word{$word} = 1;
6759     push @{$self->{idref}}, ['any', $word, $attr];
6760     } else {
6761     $self->{onerror}->(node => $attr, type => 'duplicate token',
6762     value => $word,
6763     level => $self->{level}->{must});
6764     }
6765     }
6766     },
6767 wakaba 1.136 form => $HTMLFormAttrChecker,
6768 wakaba 1.165 name => $FormControlNameAttrChecker,
6769     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6770     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6771 wakaba 1.56 }, {
6772 wakaba 1.52 %HTMLAttrStatus,
6773 wakaba 1.187 for => FEATURE_HTML5_LC | FEATURE_WF2X,
6774     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6775     name => FEATURE_HTML5_LC | FEATURE_WF2X,
6776 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6777     onformchange => FEATURE_WF2,
6778     onforminput => FEATURE_WF2,
6779 wakaba 1.52 }),
6780     };
6781    
6782     $Element->{$HTML_NS}->{isindex} = {
6783     %HTMLEmptyChecker,
6784 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6785     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6786 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6787     prompt => sub {}, ## NOTE: Text [M12N]
6788     }, {
6789     %HTMLAttrStatus,
6790 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6791     dir => FEATURE_HTML5_REC,
6792     id => FEATURE_HTML5_REC,
6793     lang => FEATURE_HTML5_REC,
6794 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6795 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6796 wakaba 1.187 style => FEATURE_HTML5_REC,
6797     title => FEATURE_HTML5_REC,
6798 wakaba 1.52 }),
6799     ## TODO: Tests
6800     ## TODO: Tests for <nest/> in <isindex>
6801 wakaba 1.66 check_start => sub {
6802     my ($self, $item, $element_state) = @_;
6803    
6804     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6805 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6806     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6807 wakaba 1.66 },
6808 wakaba 1.52 };
6809 wakaba 1.49
6810 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6811 wakaba 1.40 %HTMLChecker,
6812 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6813 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6814 wakaba 1.91 charset => sub {
6815     my ($self, $attr) = @_;
6816    
6817     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6818     $self->{onerror}->(type => 'attribute not allowed',
6819     node => $attr,
6820 wakaba 1.104 level => $self->{level}->{must});
6821 wakaba 1.91 }
6822    
6823 wakaba 1.188 ## XXXresource: MUST match the charset of the referenced
6824     ## resource (HTML5 revision 2967).
6825    
6826 wakaba 1.91 $HTMLCharsetChecker->($attr->value, @_);
6827     },
6828 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6829 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6830 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6831     async => $GetHTMLBooleanAttrChecker->('async'),
6832 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6833 wakaba 1.49 }, {
6834     %HTMLAttrStatus,
6835 wakaba 1.153 async => FEATURE_HTML5_WD,
6836     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6837     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6838 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6839     for => FEATURE_HTML4_REC_RESERVED,
6840 wakaba 1.154 href => FEATURE_RDFA_REC,
6841 wakaba 1.187 id => FEATURE_HTML5_REC,
6842 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6843 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6844     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6845 wakaba 1.9 }),
6846 wakaba 1.191 check_attrs2 => sub {
6847     my ($self, $item, $element_state) = @_;
6848    
6849     my $el = $item->{node};
6850     if ($el->has_attribute_ns (undef, 'defer') and
6851     not $el->has_attribute_ns (undef, 'src')) {
6852     $self->{onerror}->(node => $el,
6853     type => 'attribute missing',
6854     text => 'src',
6855     level => $self->{level}->{must});
6856     }
6857     },
6858 wakaba 1.40 check_start => sub {
6859     my ($self, $item, $element_state) = @_;
6860 wakaba 1.1
6861 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6862     $element_state->{must_be_empty} = 1;
6863 wakaba 1.1 } else {
6864     ## NOTE: No content model conformance in HTML5 spec.
6865 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6866     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6867 wakaba 1.1 if ((defined $type and $type eq '') or
6868     (defined $language and $language eq '')) {
6869     $type = 'text/javascript';
6870     } elsif (defined $type) {
6871     #
6872     } elsif (defined $language) {
6873     $type = 'text/' . $language;
6874     } else {
6875     $type = 'text/javascript';
6876     }
6877 wakaba 1.93
6878     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6879     $type = "$1/$2";
6880     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6881     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6882     }
6883     $element_state->{script_type} = $type;
6884 wakaba 1.40 }
6885 wakaba 1.66
6886     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6887 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6888     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6889 wakaba 1.107
6890     $element_state->{text} = '';
6891 wakaba 1.40 },
6892     check_child_element => sub {
6893     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6894     $child_is_transparent, $element_state) = @_;
6895 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6896     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6897 wakaba 1.40 $self->{onerror}->(node => $child_el,
6898     type => 'element not allowed:minus',
6899 wakaba 1.104 level => $self->{level}->{must});
6900 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6901     #
6902     } else {
6903     if ($element_state->{must_be_empty}) {
6904     $self->{onerror}->(node => $child_el,
6905 wakaba 1.104 type => 'element not allowed:empty',
6906     level => $self->{level}->{must});
6907 wakaba 1.40 }
6908     }
6909     },
6910     check_child_text => sub {
6911     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6912     if ($has_significant and
6913     $element_state->{must_be_empty}) {
6914     $self->{onerror}->(node => $child_node,
6915 wakaba 1.104 type => 'character not allowed:empty',
6916     level => $self->{level}->{must});
6917 wakaba 1.40 }
6918 wakaba 1.115 $element_state->{text} .= $child_node->data;
6919 wakaba 1.40 },
6920     check_end => sub {
6921     my ($self, $item, $element_state) = @_;
6922     unless ($element_state->{must_be_empty}) {
6923 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6924     ## NOTE: XML content should be checked by THIS instance of checker
6925     ## as part of normal tree validation.
6926 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6927     type => 'XML script lang',
6928     text => $element_state->{script_type},
6929     level => $self->{level}->{uncertain});
6930     ## ISSUE: Should we raise some kind of error for
6931     ## <script type="text/xml">aaaaa</script>?
6932     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6933 wakaba 1.93 } else {
6934     $self->{onsubdoc}->({s => $element_state->{text},
6935     container_node => $item->{node},
6936     media_type => $element_state->{script_type},
6937     is_char_string => 1});
6938     }
6939 wakaba 1.40
6940     $HTMLChecker{check_end}->(@_);
6941 wakaba 1.1 }
6942     },
6943 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6944     ## NOTE: "When used to include script data, the script data must be embedded
6945     ## inline, the format of the data must be given using the type attribute,
6946     ## and the src attribute must not be specified." - not testable.
6947     ## TODO: It would be possible to err <script type=text/plain src=...>
6948 wakaba 1.1 };
6949 wakaba 1.25 ## ISSUE: Significant check and text child node
6950 wakaba 1.1
6951     ## NOTE: When script is disabled.
6952     $Element->{$HTML_NS}->{noscript} = {
6953 wakaba 1.40 %HTMLTransparentChecker,
6954 wakaba 1.187 status => FEATURE_HTML5_REC,
6955 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6956     %HTMLAttrStatus,
6957     %HTMLM12NCommonAttrStatus,
6958 wakaba 1.187 lang => FEATURE_HTML5_REC,
6959 wakaba 1.49 }),
6960 wakaba 1.40 check_start => sub {
6961     my ($self, $item, $element_state) = @_;
6962 wakaba 1.3
6963 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6964 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6965     level => $self->{level}->{must});
6966 wakaba 1.3 }
6967    
6968 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6969     $self->_add_minus_elements ($element_state,
6970     {$HTML_NS => {noscript => 1}});
6971     }
6972 wakaba 1.79
6973     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6974     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6975 wakaba 1.3 },
6976 wakaba 1.40 check_child_element => sub {
6977     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6978     $child_is_transparent, $element_state) = @_;
6979     if ($self->{flag}->{in_head}) {
6980 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6981     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6982 wakaba 1.40 $self->{onerror}->(node => $child_el,
6983     type => 'element not allowed:minus',
6984 wakaba 1.104 level => $self->{level}->{must});
6985 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6986     #
6987     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6988     #
6989     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6990     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6991     $self->{onerror}->(node => $child_el,
6992     type => 'element not allowed:head noscript',
6993 wakaba 1.104 level => $self->{level}->{must});
6994 wakaba 1.40 }
6995     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6996 wakaba 1.47 my $http_equiv_attr
6997     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6998     if ($http_equiv_attr) {
6999     ## TODO: case
7000     if (lc $http_equiv_attr->value eq 'content-type') {
7001 wakaba 1.40 $self->{onerror}->(node => $child_el,
7002 wakaba 1.34 type => 'element not allowed:head noscript',
7003 wakaba 1.104 level => $self->{level}->{must});
7004 wakaba 1.47 } else {
7005     #
7006 wakaba 1.3 }
7007 wakaba 1.47 } else {
7008     $self->{onerror}->(node => $child_el,
7009     type => 'element not allowed:head noscript',
7010 wakaba 1.104 level => $self->{level}->{must});
7011 wakaba 1.3 }
7012 wakaba 1.40 } else {
7013     $self->{onerror}->(node => $child_el,
7014     type => 'element not allowed:head noscript',
7015 wakaba 1.104 level => $self->{level}->{must});
7016 wakaba 1.40 }
7017     } else {
7018     $HTMLTransparentChecker{check_child_element}->(@_);
7019     }
7020     },
7021     check_child_text => sub {
7022     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7023     if ($self->{flag}->{in_head}) {
7024     if ($has_significant) {
7025     $self->{onerror}->(node => $child_node,
7026 wakaba 1.104 type => 'character not allowed',
7027     level => $self->{level}->{must});
7028 wakaba 1.3 }
7029     } else {
7030 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
7031     }
7032     },
7033     check_end => sub {
7034     my ($self, $item, $element_state) = @_;
7035     $self->_remove_minus_elements ($element_state);
7036     if ($self->{flag}->{in_head}) {
7037     $HTMLChecker{check_end}->(@_);
7038     } else {
7039     $HTMLPhrasingContentChecker{check_end}->(@_);
7040 wakaba 1.3 }
7041 wakaba 1.1 },
7042     };
7043 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
7044 wakaba 1.1
7045     $Element->{$HTML_NS}->{'event-source'} = {
7046 wakaba 1.40 %HTMLEmptyChecker,
7047 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
7048     check_attrs => $GetHTMLAttrsChecker->({
7049     src => $HTMLURIAttrChecker,
7050     }, {
7051     %HTMLAttrStatus,
7052     src => FEATURE_HTML5_LC_DROPPED,
7053     }),
7054     check_start => sub {
7055     my ($self, $item, $element_state) = @_;
7056    
7057     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7058     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7059     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7060     },
7061     };
7062    
7063     $Element->{$HTML_NS}->{eventsource} = {
7064     %HTMLEmptyChecker,
7065 wakaba 1.180 status => FEATURE_HTML5_DROPPED,
7066 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7067 wakaba 1.1 src => $HTMLURIAttrChecker,
7068 wakaba 1.50 }, {
7069     %HTMLAttrStatus,
7070 wakaba 1.180 src => FEATURE_HTML5_DROPPED,
7071 wakaba 1.1 }),
7072 wakaba 1.66 check_start => sub {
7073     my ($self, $item, $element_state) = @_;
7074    
7075     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7076 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7077     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7078 wakaba 1.66 },
7079 wakaba 1.1 };
7080    
7081     $Element->{$HTML_NS}->{details} = {
7082 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
7083 wakaba 1.153 status => FEATURE_HTML5_LC,
7084 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7085 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
7086 wakaba 1.50 }, {
7087     %HTMLAttrStatus,
7088 wakaba 1.153 open => FEATURE_HTML5_LC,
7089 wakaba 1.1 }),
7090     };
7091    
7092     $Element->{$HTML_NS}->{datagrid} = {
7093 wakaba 1.72 %HTMLFlowContentChecker,
7094 wakaba 1.48 status => FEATURE_HTML5_WD,
7095 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7096 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7097     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7098 wakaba 1.50 }, {
7099     %HTMLAttrStatus,
7100     disabled => FEATURE_HTML5_WD,
7101     multiple => FEATURE_HTML5_WD,
7102 wakaba 1.1 }),
7103 wakaba 1.40 check_start => sub {
7104     my ($self, $item, $element_state) = @_;
7105 wakaba 1.1
7106 wakaba 1.40 $self->_add_minus_elements ($element_state,
7107     {$HTML_NS => {a => 1, datagrid => 1}});
7108 wakaba 1.172
7109 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7110     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7111 wakaba 1.40 },
7112     check_end => sub {
7113     my ($self, $item, $element_state) = @_;
7114     $self->_remove_minus_elements ($element_state);
7115 wakaba 1.1
7116 wakaba 1.172 $HTMLFlowContentChecker{check_end}->(@_);
7117 wakaba 1.40 },
7118 wakaba 1.1 };
7119    
7120     $Element->{$HTML_NS}->{command} = {
7121 wakaba 1.40 %HTMLEmptyChecker,
7122 wakaba 1.48 status => FEATURE_HTML5_WD,
7123 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7124 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7125     default => $GetHTMLBooleanAttrChecker->('default'),
7126     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7127     icon => $HTMLURIAttrChecker,
7128     label => sub { }, ## NOTE: No conformance creteria
7129     radiogroup => sub { }, ## NOTE: No conformance creteria
7130     type => sub {
7131     my ($self, $attr) = @_;
7132     my $value = $attr->value;
7133     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7134 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7135     level => $self->{level}->{must});
7136 wakaba 1.1 }
7137     },
7138 wakaba 1.50 }, {
7139     %HTMLAttrStatus,
7140     checked => FEATURE_HTML5_WD,
7141 wakaba 1.175 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7142 wakaba 1.50 disabled => FEATURE_HTML5_WD,
7143     icon => FEATURE_HTML5_WD,
7144     label => FEATURE_HTML5_WD,
7145     radiogroup => FEATURE_HTML5_WD,
7146     type => FEATURE_HTML5_WD,
7147 wakaba 1.1 }),
7148 wakaba 1.66 check_start => sub {
7149     my ($self, $item, $element_state) = @_;
7150    
7151     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7152 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7153     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7154 wakaba 1.66 },
7155 wakaba 1.115 };
7156    
7157     $Element->{$HTML_NS}->{bb} = {
7158     %HTMLPhrasingContentChecker,
7159 wakaba 1.190 status => FEATURE_HTML5_DROPPED,
7160 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7161     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7162     }, {
7163     %HTMLAttrStatus,
7164 wakaba 1.190 type => FEATURE_HTML5_DROPPED,
7165 wakaba 1.115 }),
7166 wakaba 1.130 check_start => sub {
7167     my ($self, $item, $element_state) = @_;
7168     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7169    
7170     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7171     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7172     },
7173     check_end => sub {
7174     my ($self, $item, $element_state) = @_;
7175     $self->_remove_minus_elements ($element_state);
7176    
7177     $HTMLTransparentChecker{check_end}->(@_);
7178     },
7179 wakaba 1.1 };
7180    
7181     $Element->{$HTML_NS}->{menu} = {
7182 wakaba 1.40 %HTMLPhrasingContentChecker,
7183 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7184     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7185     ## NOTE: We don't want any |menu| element warned as deprecated.
7186 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7187 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7188 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7189 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7190     ## implementation, it does not match.)
7191 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7192     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7193 wakaba 1.49 }, {
7194     %HTMLAttrStatus,
7195     %HTMLM12NCommonAttrStatus,
7196 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7197 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7198 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7199 wakaba 1.50 label => FEATURE_HTML5_WD,
7200 wakaba 1.187 lang => FEATURE_HTML5_REC,
7201 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7202     sdapref => FEATURE_HTML20_RFC,
7203 wakaba 1.50 type => FEATURE_HTML5_WD,
7204 wakaba 1.1 }),
7205 wakaba 1.40 check_start => sub {
7206     my ($self, $item, $element_state) = @_;
7207     $element_state->{phase} = 'li or phrasing';
7208     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7209     $self->{flag}->{in_menu} = 1;
7210 wakaba 1.79
7211     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7212     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7213 wakaba 1.135 $element_state->{id_type} = 'menu';
7214 wakaba 1.40 },
7215     check_child_element => sub {
7216     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7217     $child_is_transparent, $element_state) = @_;
7218 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7219     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7220 wakaba 1.40 $self->{onerror}->(node => $child_el,
7221     type => 'element not allowed:minus',
7222 wakaba 1.104 level => $self->{level}->{must});
7223 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7224     #
7225     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7226     if ($element_state->{phase} eq 'li') {
7227     #
7228     } elsif ($element_state->{phase} eq 'li or phrasing') {
7229     $element_state->{phase} = 'li';
7230     } else {
7231 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7232     level => $self->{level}->{must});
7233 wakaba 1.40 }
7234     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7235     if ($element_state->{phase} eq 'phrasing') {
7236     #
7237     } elsif ($element_state->{phase} eq 'li or phrasing') {
7238     $element_state->{phase} = 'phrasing';
7239     } else {
7240 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7241     level => $self->{level}->{must});
7242 wakaba 1.40 }
7243     } else {
7244 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7245     level => $self->{level}->{must});
7246 wakaba 1.40 }
7247     },
7248     check_child_text => sub {
7249     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7250     if ($has_significant) {
7251     if ($element_state->{phase} eq 'phrasing') {
7252     #
7253     } elsif ($element_state->{phase} eq 'li or phrasing') {
7254     $element_state->{phase} = 'phrasing';
7255     } else {
7256     $self->{onerror}->(node => $child_node,
7257 wakaba 1.104 type => 'character not allowed',
7258     level => $self->{level}->{must});
7259 wakaba 1.1 }
7260     }
7261 wakaba 1.40 },
7262     check_end => sub {
7263     my ($self, $item, $element_state) = @_;
7264     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7265    
7266     if ($element_state->{phase} eq 'li') {
7267     $HTMLChecker{check_end}->(@_);
7268     } else { # 'phrasing' or 'li or phrasing'
7269     $HTMLPhrasingContentChecker{check_end}->(@_);
7270 wakaba 1.1 }
7271     },
7272 wakaba 1.8 };
7273    
7274     $Element->{$HTML_NS}->{datatemplate} = {
7275 wakaba 1.40 %HTMLChecker,
7276 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7277 wakaba 1.40 check_child_element => sub {
7278     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7279     $child_is_transparent, $element_state) = @_;
7280 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7281     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7282 wakaba 1.40 $self->{onerror}->(node => $child_el,
7283     type => 'element not allowed:minus',
7284 wakaba 1.104 level => $self->{level}->{must});
7285 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7286     #
7287     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7288     #
7289     } else {
7290     $self->{onerror}->(node => $child_el,
7291 wakaba 1.104 type => 'element not allowed:datatemplate',
7292     level => $self->{level}->{must});
7293 wakaba 1.40 }
7294     },
7295     check_child_text => sub {
7296     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7297     if ($has_significant) {
7298 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7299     level => $self->{level}->{must});
7300 wakaba 1.8 }
7301     },
7302     is_xml_root => 1,
7303     };
7304    
7305     $Element->{$HTML_NS}->{rule} = {
7306 wakaba 1.40 %HTMLChecker,
7307 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7308 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7309 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7310 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7311 wakaba 1.50 }, {
7312     %HTMLAttrStatus,
7313     condition => FEATURE_HTML5_AT_RISK,
7314     mode => FEATURE_HTML5_AT_RISK,
7315 wakaba 1.8 }),
7316 wakaba 1.40 check_start => sub {
7317     my ($self, $item, $element_state) = @_;
7318 wakaba 1.79
7319 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7320 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7321     $self->{flag}->{in_rule} = 1;
7322    
7323     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7324     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7325 wakaba 1.40 },
7326     check_child_element => sub { },
7327     check_child_text => sub { },
7328     check_end => sub {
7329     my ($self, $item, $element_state) = @_;
7330 wakaba 1.79
7331 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7332 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7333    
7334 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7335 wakaba 1.8 },
7336     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7337     ## is applied to some conforming data, results in a conforming DOM tree.":
7338     ## We don't check against this.
7339     };
7340    
7341     $Element->{$HTML_NS}->{nest} = {
7342 wakaba 1.40 %HTMLEmptyChecker,
7343 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7344 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7345 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7346     mode => sub {
7347     my ($self, $attr) = @_;
7348     my $value = $attr->value;
7349 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7350 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7351     level => $self->{level}->{must});
7352 wakaba 1.23 }
7353     },
7354 wakaba 1.50 }, {
7355     %HTMLAttrStatus,
7356     filter => FEATURE_HTML5_AT_RISK,
7357     mode => FEATURE_HTML5_AT_RISK,
7358 wakaba 1.8 }),
7359 wakaba 1.1 };
7360    
7361     $Element->{$HTML_NS}->{legend} = {
7362 wakaba 1.40 %HTMLPhrasingContentChecker,
7363 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7364 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7365     # align => $GetHTMLEnumeratedAttrChecker->({
7366     # top => 1, bottom => 1, left => 1, right => 1,
7367     # }),
7368 wakaba 1.167 form => $HTMLFormAttrChecker,
7369 wakaba 1.52 }, {
7370 wakaba 1.49 %HTMLAttrStatus,
7371     %HTMLM12NCommonAttrStatus,
7372 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7373 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7374 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7375 wakaba 1.187 lang => FEATURE_HTML5_REC,
7376 wakaba 1.49 }),
7377 wakaba 1.170 check_child_element => sub {
7378     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7379     $child_is_transparent, $element_state) = @_;
7380     if ($item->{parent_state}->{in_figure}) {
7381     $HTMLFlowContentChecker{check_child_element}->(@_);
7382     } else {
7383     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7384     }
7385     },
7386     check_child_text => sub {
7387     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7388     if ($item->{parent_state}->{in_figure}) {
7389     $HTMLFlowContentChecker{check_child_text}->(@_);
7390     } else {
7391     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7392     }
7393     },
7394     check_start => sub {
7395     my ($self, $item, $element_state) = @_;
7396     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7397    
7398     $HTMLFlowContentChecker{check_start}->(@_);
7399     },
7400     check_end => sub {
7401     my ($self, $item, $element_state) = @_;
7402     $self->_remove_minus_elements ($element_state);
7403    
7404     $HTMLFlowContentChecker{check_end}->(@_);
7405     },
7406     }; # legend
7407 wakaba 1.1
7408     $Element->{$HTML_NS}->{div} = {
7409 wakaba 1.72 %HTMLFlowContentChecker,
7410 wakaba 1.187 status => FEATURE_HTML5_REC,
7411 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7412     align => $GetHTMLEnumeratedAttrChecker->({
7413     left => 1, center => 1, right => 1, justify => 1,
7414     }),
7415     }, {
7416 wakaba 1.49 %HTMLAttrStatus,
7417 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7418 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7419     datafld => FEATURE_HTML4_REC_RESERVED,
7420     dataformatas => FEATURE_HTML4_REC_RESERVED,
7421     datasrc => FEATURE_HTML4_REC_RESERVED,
7422 wakaba 1.187 lang => FEATURE_HTML5_REC,
7423 wakaba 1.49 }),
7424 wakaba 1.66 check_start => sub {
7425     my ($self, $item, $element_state) = @_;
7426    
7427     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7428 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7429     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7430 wakaba 1.66 },
7431 wakaba 1.1 };
7432    
7433 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7434 wakaba 1.72 %HTMLFlowContentChecker,
7435 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7436     check_attrs => $GetHTMLAttrsChecker->({}, {
7437     %HTMLAttrStatus,
7438     %HTMLM12NCommonAttrStatus,
7439 wakaba 1.187 lang => FEATURE_HTML5_REC,
7440 wakaba 1.64 }),
7441     };
7442    
7443 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7444 wakaba 1.40 %HTMLTransparentChecker,
7445 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7446 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7447     ## TODO: HTML4 |size|, |color|, |face|
7448 wakaba 1.49 }, {
7449     %HTMLAttrStatus,
7450 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7451 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7452 wakaba 1.187 dir => FEATURE_HTML5_REC,
7453 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7454 wakaba 1.187 id => FEATURE_HTML5_REC,
7455     lang => FEATURE_HTML5_REC,
7456 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7457 wakaba 1.187 style => FEATURE_HTML5_REC,
7458     title => FEATURE_HTML5_REC,
7459 wakaba 1.49 }),
7460 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7461     ## it is allowed only in a document with the WYSIWYG signature. The
7462     ## checker does not check whether there is the signature, since the
7463     ## signature is dropped, too, and has never been implemented. (In addition,
7464     ## for any |font| element an "element not defined" error is raised anyway,
7465     ## such that we don't have to raise an additional error.)
7466 wakaba 1.1 };
7467 wakaba 1.49
7468 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7469     %HTMLEmptyChecker,
7470     status => FEATURE_M12N10_REC_DEPRECATED,
7471     check_attrs => $GetHTMLAttrsChecker->({
7472     ## TODO: color, face, size
7473     }, {
7474     %HTMLAttrStatus,
7475     color => FEATURE_M12N10_REC_DEPRECATED,
7476     face => FEATURE_M12N10_REC_DEPRECATED,
7477 wakaba 1.187 id => FEATURE_HTML5_REC,
7478 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7479     }),
7480 wakaba 1.188 }; # basefont
7481 wakaba 1.64
7482 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7483 wakaba 1.188 ## class title id cols rows style(x10)
7484    
7485     $Element->{$HTML_NS}->{frameset} = {
7486     %HTMLEmptyChecker, # XXX
7487     status => FEATURE_M12N10_REC,
7488     check_attrs => $GetHTMLAttrsChecker->({
7489     ## XXX
7490     onafterprint => $HTMLEventHandlerAttrChecker,
7491     onbeforeprint => $HTMLEventHandlerAttrChecker,
7492     onbeforeunload => $HTMLEventHandlerAttrChecker,
7493     onblur => $HTMLEventHandlerAttrChecker,
7494     onerror => $HTMLEventHandlerAttrChecker,
7495     onfocus => $HTMLEventHandlerAttrChecker,
7496     onhashchange => $HTMLEventHandlerAttrChecker,
7497     onload => $HTMLEventHandlerAttrChecker,
7498     onmessage => $HTMLEventHandlerAttrChecker,
7499     onoffline => $HTMLEventHandlerAttrChecker,
7500     ononline => $HTMLEventHandlerAttrChecker,
7501     onpopstate => $HTMLEventHandlerAttrChecker,
7502     onredo => $HTMLEventHandlerAttrChecker,
7503     onresize => $HTMLEventHandlerAttrChecker,
7504     onstorage => $HTMLEventHandlerAttrChecker,
7505     onundo => $HTMLEventHandlerAttrChecker,
7506     onunload => $HTMLEventHandlerAttrChecker,
7507     }, {
7508     %HTMLAttrStatus,
7509     ## XXX
7510     onload => FEATURE_M12N10_REC,
7511     onunload => FEATURE_M12N10_REC,
7512     }),
7513     }; # frameset
7514    
7515 wakaba 1.49 ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7516     ## noframes Common, lang(xhtml10)
7517    
7518 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7519 wakaba 1.56
7520 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7521     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7522     ## xmp, listing sdapref[HTML2,0]
7523    
7524 wakaba 1.56 =pod
7525    
7526 wakaba 1.61 HTML 2.0 nextid @n
7527    
7528     RFC 2659: CERTS CRYPTOPTS
7529    
7530     ISO-HTML: pre-html, divN
7531 wakaba 1.82
7532     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7533     di (Common), nl (Common), handler (Common, type), standby (Common),
7534     summary (Common)
7535    
7536 wakaba 1.97 Access & XHTML2: access (LC)
7537 wakaba 1.82
7538     XML Events & XForms (for XHTML2 support; very, very low priority)
7539 wakaba 1.61
7540 wakaba 1.56 =cut
7541 wakaba 1.61
7542     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7543     ## We added them only to |a|. |link| and |form| might also allow them
7544     ## in theory.
7545 wakaba 1.1
7546     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7547    
7548     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24