/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.170 - (hide annotations) (download)
Sun Jun 28 10:48:30 2009 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.169: +39 -1 lines
++ whatpm/t/ChangeLog	28 Jun 2009 10:48:13 -0000
2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.t: Added new test data file.

++ whatpm/t/dom-conformance/ChangeLog	28 Jun 2009 10:47:52 -0000
2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* html-scripting-1.dat: New file.

2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* html-flows-1.dat, html-tables-1.dat: Test data for flow content
	in |legend| and |caption| are added (c.f. HTML5 revision 3252).

++ whatpm/Whatpm/ContentChecker/ChangeLog	28 Jun 2009 10:46:58 -0000
2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: |ondataunavailable| has been removed (HTML5 revision
	3252).

2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Block-level contents in |figure|'s |legend| and
	|caption| are now allowed (HTML5 revision 3329).

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.89 sub FEATURE_HTML5_COMPLETE () {
10 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
11 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
12     Whatpm::ContentChecker::FEATURE_ALLOWED
13     }
14 wakaba 1.154 sub FEATURE_HTML5_CR () {
15     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
16     Whatpm::ContentChecker::FEATURE_STATUS_CR |
17     Whatpm::ContentChecker::FEATURE_ALLOWED
18     }
19 wakaba 1.54 sub FEATURE_HTML5_LC () {
20 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
21 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_AT_RISK () {
25 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
26     ## status.
27 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
28     Whatpm::ContentChecker::FEATURE_ALLOWED
29     }
30     sub FEATURE_HTML5_WD () {
31 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
32 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
33     Whatpm::ContentChecker::FEATURE_ALLOWED
34     }
35     sub FEATURE_HTML5_FD () {
36 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
37 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40     sub FEATURE_HTML5_DEFAULT () {
41 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
42 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
43     Whatpm::ContentChecker::FEATURE_ALLOWED
44 wakaba 1.49 }
45 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
46 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
47     ## comments, but then dropped.
48 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
49     }
50 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
51 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
52     ## then dropped.
53 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
54     }
55 wakaba 1.154
56 wakaba 1.119 sub FEATURE_WF2X () {
57 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
58     ## incorporated into the HTML5 spec.
59 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
60     }
61 wakaba 1.54 sub FEATURE_WF2 () {
62 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
63     ## merged into HTML5.
64 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
65 wakaba 1.54 }
66 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
67 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
68     ## were not merged into HTML5.
69 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
70     }
71 wakaba 1.49
72 wakaba 1.154 sub FEATURE_RDFA_REC () {
73     Whatpm::ContentChecker::FEATURE_STATUS_REC
74 wakaba 1.121 }
75 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
76     ## NOTE: The feature that was defined in a RDFa last call working
77     ## draft, but then dropped.
78 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
79     }
80 wakaba 1.58
81     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
82     ## attribute can be used- the only requirements for that matter is:
83     ## "the attribute MUST be referenced using its namespace-qualified form" (and
84     ## this is a host language conformance!).
85 wakaba 1.82 sub FEATURE_ROLE_LC () {
86     Whatpm::ContentChecker::FEATURE_STATUS_LC
87     }
88    
89     sub FEATURE_XHTML2_ED () {
90 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
91     ## "http://www.w3.org/1999/xhtml".
92 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
93     }
94 wakaba 1.58
95 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
96 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
97     ## M12N).
98     Whatpm::ContentChecker::FEATURE_STATUS_REC
99 wakaba 1.55 }
100     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
101 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
102     ## features.
103     Whatpm::ContentChecker::FEATURE_STATUS_REC |
104 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
105     }
106    
107 wakaba 1.154 sub FEATURE_RUBY_REC () {
108     Whatpm::ContentChecker::FEATURE_STATUS_CR
109 wakaba 1.82 }
110    
111 wakaba 1.154 sub FEATURE_M12N11_LC () {
112     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
113     Whatpm::ContentChecker::FEATURE_STATUS_REC;
114 wakaba 1.99 }
115    
116 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
117     ## It contains a number of problems. (However, again, it's a REC!)
118 wakaba 1.54 sub FEATURE_M12N10_REC () {
119 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
120 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
121     }
122     sub FEATURE_M12N10_REC_DEPRECATED () {
123     Whatpm::ContentChecker::FEATURE_STATUS_REC |
124     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
125     }
126 wakaba 1.49
127     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
128     ## (second edition). Only missing attributes from M12N10 abstract
129     ## definition are added.
130 wakaba 1.54 sub FEATURE_XHTML10_REC () {
131     Whatpm::ContentChecker::FEATURE_STATUS_CR
132     }
133    
134 wakaba 1.61 ## NOTE: Diff from HTML4.
135     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
136     Whatpm::ContentChecker::FEATURE_STATUS_CR
137     }
138 wakaba 1.58
139 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
140     ## 4.01). Only missing attributes from XHTML10 are added.
141 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
142     Whatpm::ContentChecker::FEATURE_STATUS_WD
143     }
144    
145     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
146     ## rather than presentational attributes (deprecated or not deprecated).
147 wakaba 1.48
148 wakaba 1.61 ## NOTE: Diff from HTML4.
149     sub FEATURE_HTML32_REC_OBSOLETE () {
150     Whatpm::ContentChecker::FEATURE_STATUS_CR |
151     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
152     ## NOTE: Lowercase normative "should".
153     }
154    
155     sub FEATURE_RFC2659 () { ## Experimental RFC
156     Whatpm::ContentChecker::FEATURE_STATUS_CR
157     }
158    
159     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
160     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
161     Whatpm::ContentChecker::FEATURE_STATUS_CR
162     }
163    
164     ## NOTE: Diff from HTML 2.0.
165     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
166     Whatpm::ContentChecker::FEATURE_STATUS_CR
167     }
168    
169     ## NOTE: Diff from HTML 3.2.
170     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
171     Whatpm::ContentChecker::FEATURE_STATUS_CR
172     }
173 wakaba 1.58
174 wakaba 1.29 ## December 2007 HTML5 Classification
175    
176     my $HTMLMetadataContent = {
177     $HTML_NS => {
178     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
179 wakaba 1.118 'event-source' => 1, eventsource => 1,
180     command => 1, datatemplate => 1,
181 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
182     ## a metadata content other than |head| element.
183     meta => 1,
184     },
185     ## NOTE: RDF is mentioned in the HTML5 spec.
186     ## TODO: Other RDF elements?
187     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
188     };
189    
190 wakaba 1.72 my $HTMLFlowContent = {
191 wakaba 1.29 $HTML_NS => {
192     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
193     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
194     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
195     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
196 wakaba 1.119 form => 1, fieldset => 1,
197 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
198     datagrid => 1, ## ISSUE: "Flow element" in spec.
199 wakaba 1.29 datatemplate => 1,
200     div => 1, ## ISSUE: No category in spec.
201     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
202     ## Additionally, it must be before any other element or
203     ## non-inter-element-whitespace text node.
204     style => 1,
205    
206 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
207 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
208     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
209 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
210 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
211     command => 1, bb => 1,
212 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
213 wakaba 1.121 textarea => 1, output => 1,
214 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
215     ## NOTE: |area| is allowed only as a descendant of |map|.
216     area => 1,
217    
218 wakaba 1.124 ## NOTE: Transparent.
219     a => 1, ins => 1, del => 1, font => 1,
220 wakaba 1.29
221 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
222 wakaba 1.29 menu => 1,
223    
224     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
225     canvas => 1,
226     },
227    
228     ## NOTE: Embedded
229     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
230     q<http://www.w3.org/2000/svg> => {svg => 1},
231     };
232    
233 wakaba 1.58 my $HTMLSectioningContent = {
234 wakaba 1.57 $HTML_NS => {
235     section => 1, nav => 1, article => 1, aside => 1,
236     ## NOTE: |body| is only allowed in |html| element.
237     body => 1,
238     },
239     };
240    
241 wakaba 1.58 my $HTMLSectioningRoot = {
242 wakaba 1.29 $HTML_NS => {
243 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
244 wakaba 1.29 },
245     };
246    
247     my $HTMLHeadingContent = {
248     $HTML_NS => {
249     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
250     },
251     };
252    
253     my $HTMLPhrasingContent = {
254 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
255 wakaba 1.29 $HTML_NS => {
256 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
257 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
258     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
259 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
260 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
261     command => 1, bb => 1,
262 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
263 wakaba 1.121 textarea => 1, output => 1,
264 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
265     ## NOTE: |area| is allowed only as a descendant of |map|.
266     area => 1,
267    
268     ## NOTE: Transparent.
269 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
270 wakaba 1.29
271 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
272 wakaba 1.29 menu => 1,
273    
274     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
275     canvas => 1,
276     },
277    
278     ## NOTE: Embedded
279     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
280     q<http://www.w3.org/2000/svg> => {svg => 1},
281    
282     ## NOTE: And non-inter-element-whitespace text nodes.
283     };
284    
285 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
286 wakaba 1.29
287     my $HTMLInteractiveContent = {
288     $HTML_NS => {
289     a => 1,
290 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
291     details => 1, datagrid => 1, bb => 1,
292    
293     ## NOTE: When "controls" attribute is specified.
294     video => 1, audio => 1,
295    
296     ## NOTE: When "type=toolbar" attribute is specified.
297     menu => 1,
298 wakaba 1.29 },
299     };
300    
301 wakaba 1.139 ## NOTE: Labelable form-associated element.
302     my $LabelableFAE = {
303     $HTML_NS => {
304     input => 1, button => 1, select => 1, textarea => 1,
305     },
306     };
307    
308 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
309    
310 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
311     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
312    
313     ## -- Common attribute syntacx checkers
314    
315 wakaba 1.1 our $AttrChecker;
316 wakaba 1.82 our $AttrStatus;
317 wakaba 1.1
318     my $GetHTMLEnumeratedAttrChecker = sub {
319     my $states = shift; # {value => conforming ? 1 : -1}
320     return sub {
321     my ($self, $attr) = @_;
322     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
323     if ($states->{$value} > 0) {
324     #
325     } elsif ($states->{$value}) {
326 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
327     level => $self->{level}->{must});
328 wakaba 1.1 } else {
329 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
330     level => $self->{level}->{must});
331 wakaba 1.1 }
332     };
333     }; # $GetHTMLEnumeratedAttrChecker
334    
335     my $GetHTMLBooleanAttrChecker = sub {
336     my $local_name = shift;
337     return sub {
338     my ($self, $attr) = @_;
339 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
340 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
341 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
342 wakaba 1.104 level => $self->{level}->{must});
343 wakaba 1.1 }
344     };
345     }; # $GetHTMLBooleanAttrChecker
346    
347 wakaba 1.8 ## Unordered set of space-separated tokens
348 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
349     my $allowed_words = shift;
350     return sub {
351     my ($self, $attr) = @_;
352     my %word;
353 wakaba 1.132 for my $word (grep {length $_}
354     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
355 wakaba 1.92 unless ($word{$word}) {
356     $word{$word} = 1;
357     if (not defined $allowed_words or
358     $allowed_words->{$word}) {
359     #
360     } else {
361 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
362 wakaba 1.92 value => $word,
363 wakaba 1.104 level => $self->{level}->{must});
364 wakaba 1.92 }
365     } else {
366 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
367     value => $word,
368     level => $self->{level}->{must});
369 wakaba 1.92 }
370 wakaba 1.8 }
371 wakaba 1.92 };
372     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
373 wakaba 1.8
374 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
375 wakaba 1.1 ## whose allowed values are defined by the section on link types)
376     my $HTMLLinkTypesAttrChecker = sub {
377 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
378 wakaba 1.1 my %word;
379 wakaba 1.132 for my $word (grep {length $_}
380     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
381 wakaba 1.1 unless ($word{$word}) {
382     $word{$word} = 1;
383 wakaba 1.18 } elsif ($word eq 'up') {
384     #
385 wakaba 1.1 } else {
386 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
387     value => $word,
388     level => $self->{level}->{must});
389 wakaba 1.1 }
390     }
391     ## NOTE: Case sensitive match (since HTML5 spec does not say link
392     ## types are case-insensitive and it says "The value should not
393     ## be confusingly similar to any other defined value (e.g.
394     ## differing only in case).").
395     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
396     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
397     ## values to be used conformingly.
398 wakaba 1.66
399     my $is_hyperlink;
400     my $is_resource;
401 wakaba 1.1 require Whatpm::_LinkTypeList;
402     our $LinkType;
403     for my $word (keys %word) {
404     my $def = $LinkType->{$word};
405     if (defined $def) {
406     if ($def->{status} eq 'accepted') {
407     if (defined $def->{effect}->[$a_or_area]) {
408     #
409     } else {
410     $self->{onerror}->(node => $attr,
411 wakaba 1.104 type => 'link type:bad context',
412     value => $word,
413 wakaba 1.110 level => $self->{level}->{must});
414 wakaba 1.1 }
415     } elsif ($def->{status} eq 'proposal') {
416 wakaba 1.104 $self->{onerror}->(node => $attr,
417     type => 'link type:proposed',
418     value => $word,
419     level => $self->{level}->{should});
420 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
421     #
422     } else {
423     $self->{onerror}->(node => $attr,
424 wakaba 1.104 type => 'link type:bad context',
425     value => $word,
426     level => $self->{level}->{must});
427 wakaba 1.20 }
428 wakaba 1.1 } else { # rejected or synonym
429     $self->{onerror}->(node => $attr,
430 wakaba 1.104 type => 'link type:non-conforming',
431     value => $word,
432     level => $self->{level}->{must});
433 wakaba 1.1 }
434 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
435     if ($word eq 'alternate') {
436     #
437     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
438 wakaba 1.66 $is_hyperlink = 1;
439 wakaba 1.4 }
440     }
441 wakaba 1.1 if ($def->{unique}) {
442     unless ($self->{has_link_type}->{$word}) {
443     $self->{has_link_type}->{$word} = 1;
444     } else {
445     $self->{onerror}->(node => $attr,
446 wakaba 1.104 type => 'link type:duplicate',
447     value => $word,
448     level => $self->{level}->{must});
449 wakaba 1.1 }
450     }
451 wakaba 1.66
452     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
453     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
454     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
455     }
456 wakaba 1.1 } else {
457 wakaba 1.104 $self->{onerror}->(node => $attr,
458     type => 'unknown link type',
459     value => $word,
460     level => $self->{level}->{uncertain});
461 wakaba 1.1 }
462     }
463 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
464 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
465     ## says that using both X-Pingback: header field and HTML
466     ## <link rel=pingback> is deprecated and if both appears they
467     ## SHOULD contain exactly the same value.
468     ## ISSUE: Pingback 1.0 specification defines the exact representation
469     ## of its link element, which cannot be tested by the current arch.
470     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
471     ## include any string that matches to the pattern for the rel=pingback link,
472     ## which again inpossible to test.
473     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
474 wakaba 1.12
475     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
476 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
477     ## then they SHOULD be described in different paragraphs.".
478 wakaba 1.66
479     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
480     if ($is_hyperlink or $a_or_area) {
481     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
482     }
483     if ($is_resource and not $a_or_area) {
484     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
485     }
486 wakaba 1.96
487     $element_state->{link_rel} = \%word;
488 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
489 wakaba 1.20
490     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
491 wakaba 1.1
492     ## URI (or IRI)
493     my $HTMLURIAttrChecker = sub {
494 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
495 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
496     my $value = $attr->value;
497     Whatpm::URIChecker->check_iri_reference ($value, sub {
498 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
499 wakaba 1.106 }), $self->{level};
500 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
501 wakaba 1.66
502     my $attr_name = $attr->name;
503     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
504     ## TODO: absolute
505     push @{$self->{return}->{uri}->{$value} ||= []},
506     $element_state->{uri_info}->{$attr_name};
507 wakaba 1.1 }; # $HTMLURIAttrChecker
508    
509     ## A space separated list of one or more URIs (or IRIs)
510     my $HTMLSpaceURIsAttrChecker = sub {
511     my ($self, $attr) = @_;
512 wakaba 1.66
513     my $type = {ping => 'action',
514     profile => 'namespace',
515     archive => 'resource'}->{$attr->name};
516    
517 wakaba 1.1 my $i = 0;
518 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
519 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
520 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
521 wakaba 1.106 }, $self->{level});
522 wakaba 1.66
523     ## TODO: absolute
524     push @{$self->{return}->{uri}->{$value} ||= []},
525 wakaba 1.67 {node => $attr, type => {$type => 1}};
526 wakaba 1.66
527 wakaba 1.1 $i++;
528     }
529 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
530 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
531     ## ISSUE: A sequence of white space characters are conformant?
532     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
533 wakaba 1.132 ## ISSUE: What is "space"?
534 wakaba 1.1 ## NOTE: Duplication seems not an error.
535 wakaba 1.4 $self->{has_uri_attr} = 1;
536 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
537    
538 wakaba 1.156 my $ValidEmailAddress;
539     {
540     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
541     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
542     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
543     }
544    
545 wakaba 1.168 ## Valid global date and time.
546     my $GetDateTimeAttrChecker = sub ($) {
547     my $type = shift;
548     return sub {
549     my ($self, $attr, $item, $element_state) = @_;
550    
551     my $range_error;
552    
553     require Message::Date;
554     my $dp = Message::Date->new;
555     $dp->{level} = $self->{level};
556     $dp->{onerror} = sub {
557     my %opt = @_;
558     unless ($opt{type} eq 'date value not supported') {
559     $self->{onerror}->(%opt, node => $attr);
560     $range_error = '';
561     }
562     };
563    
564     my $method = 'parse_' . $type;
565     my $d = $dp->$method ($attr->value);
566     $element_state->{date_value}->{$attr->name} = $d || $range_error;
567     };
568     }; # $GetDateTimeAttrChecker
569 wakaba 1.1
570     my $HTMLIntegerAttrChecker = sub {
571     my ($self, $attr) = @_;
572     my $value = $attr->value;
573     unless ($value =~ /\A-?[0-9]+\z/) {
574 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
575     level => $self->{level}->{must});
576 wakaba 1.1 }
577     }; # $HTMLIntegerAttrChecker
578    
579     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
580     my $range_check = shift;
581     return sub {
582     my ($self, $attr) = @_;
583     my $value = $attr->value;
584     if ($value =~ /\A[0-9]+\z/) {
585     unless ($range_check->($value + 0)) {
586 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
587     level => $self->{level}->{must});
588 wakaba 1.1 }
589     } else {
590     $self->{onerror}->(node => $attr,
591 wakaba 1.104 type => 'nninteger:syntax error',
592     level => $self->{level}->{must});
593 wakaba 1.1 }
594     };
595     }; # $GetHTMLNonNegativeIntegerAttrChecker
596    
597     my $GetHTMLFloatingPointNumberAttrChecker = sub {
598     my $range_check = shift;
599     return sub {
600 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
601 wakaba 1.1 my $value = $attr->value;
602 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
603     $value =~ /\A-?\.[0-9]+\z/) {
604 wakaba 1.168 if ($range_check->($value + 0)) {
605     ## TODO: parse algorithm
606     $element_state->{number_value}->{$attr->name} = $value + 0;
607     } else {
608 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
609     level => $self->{level}->{must});
610 wakaba 1.1 }
611     } else {
612     $self->{onerror}->(node => $attr,
613 wakaba 1.104 type => 'float:syntax error',
614     level => $self->{level}->{must});
615 wakaba 1.1 }
616     };
617 wakaba 1.144
618     ## TODO: scientific notation
619 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
620    
621 wakaba 1.148 my $StepAttrChecker = sub {
622     ## NOTE: A valid floating point number (> 0), or ASCII
623     ## case-insensitive "any".
624    
625     my ($self, $attr) = @_;
626     my $value = $attr->value;
627     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
628     $value =~ /\A-?\.[0-9]+\z/) {
629     unless ($value > 0) {
630     $self->{onerror}->(node => $attr, type => 'float:out of range',
631     level => $self->{level}->{must});
632     }
633     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
634     #
635     } else {
636     $self->{onerror}->(node => $attr,
637     type => 'float:syntax error',
638     level => $self->{level}->{must});
639     }
640    
641     ## TODO: scientific
642     }; # $StepAttrChecker
643    
644 wakaba 1.86 ## HTML4 %Length;
645     my $HTMLLengthAttrChecker = sub {
646     my ($self, $attr) = @_;
647     my $value = $attr->value;
648     unless ($value =~ /\A[0-9]+%?\z/) {
649     $self->{onerror}->(node => $attr, type => 'length:syntax error',
650 wakaba 1.104 level => $self->{level}->{must});
651 wakaba 1.86 }
652    
653     ## NOTE: HTML4 definition is too vague - it does not define the syntax
654     ## of percentage value at all (!).
655     }; # $HTMLLengthAttrChecker
656    
657 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
658     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
659     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
660    
661 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
662     ## ISSUE: RFC 2046 does not define syntax of media types.
663     ## ISSUE: The definition of "a valid MIME type" is unknown.
664     ## Syntactical correctness?
665     my $HTMLIMTAttrChecker = sub {
666     my ($self, $attr) = @_;
667     my $value = $attr->value;
668     ## ISSUE: RFC 2045 Content-Type header field allows insertion
669     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
670     ## ISSUE: RFC 2231 extension? Maybe no.
671     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
672     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
673 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
674 wakaba 1.1 my @type = ($1, $2);
675     my $param = $3;
676 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
677 wakaba 1.1 if (defined $2) {
678     push @type, $1 => $2;
679     } else {
680     my $n = $1;
681 wakaba 1.152 my $v = $3;
682 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
683 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
684 wakaba 1.1 }
685     }
686     require Whatpm::IMTChecker;
687 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
688     $ic->{level} = $self->{level};
689     $ic->check_imt (sub {
690 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
691 wakaba 1.1 }, @type);
692     } else {
693 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
694     level => $self->{level}->{must});
695 wakaba 1.1 }
696     }; # $HTMLIMTAttrChecker
697    
698     my $HTMLLanguageTagAttrChecker = sub {
699 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
700    
701 wakaba 1.1 my ($self, $attr) = @_;
702 wakaba 1.6 my $value = $attr->value;
703     require Whatpm::LangTag;
704     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
705 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
706 wakaba 1.106 }, $self->{level});
707 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
708 wakaba 1.6
709     ## TODO: testdata
710 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
711    
712     ## "A valid media query [MQ]"
713     my $HTMLMQAttrChecker = sub {
714     my ($self, $attr) = @_;
715 wakaba 1.104 $self->{onerror}->(node => $attr,
716     type => 'media query',
717     level => $self->{level}->{uncertain});
718 wakaba 1.1 ## ISSUE: What is "a valid media query"?
719     }; # $HTMLMQAttrChecker
720    
721     my $HTMLEventHandlerAttrChecker = sub {
722     my ($self, $attr) = @_;
723 wakaba 1.104 $self->{onerror}->(node => $attr,
724     type => 'event handler',
725     level => $self->{level}->{uncertain});
726 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
727     ## ECMAScript |FunctionBody| production. [ECMA262]
728     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
729     ## ISSUE: Automatic semicolon insertion does not apply?
730     ## ISSUE: Other script languages?
731     }; # $HTMLEventHandlerAttrChecker
732    
733 wakaba 1.136 my $HTMLFormAttrChecker = sub {
734     my ($self, $attr) = @_;
735    
736     ## NOTE: MUST be the ID of a |form| element.
737    
738     my $value = $attr->value;
739 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
740 wakaba 1.136
741     ## ISSUE: <form id=""><input form=""> (empty ID)?
742     }; # $HTMLFormAttrChecker
743    
744 wakaba 1.158 my $ListAttrChecker = sub {
745     my ($self, $attr) = @_;
746    
747     ## NOTE: MUST be the ID of a |datalist| element.
748    
749     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
750    
751     ## TODO: Warn violation to control-dependent restrictions. For
752     ## example, |<input type=url maxlength=10 list=a> <datalist
753     ## id=a><option value=nonurlandtoolong></datalist>| should be
754     ## warned.
755     }; # $ListAttrChecker
756    
757 wakaba 1.160 my $PatternAttrChecker = sub {
758     my ($self, $attr) = @_;
759     $self->{onsubdoc}->({s => $attr->value,
760     container_node => $attr,
761     media_type => 'text/x-regexp-js',
762     is_char_string => 1});
763 wakaba 1.161
764     ## ISSUE: "value must match the Pattern production of ECMA 262's
765     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
766    
767     ## TODO: Warn if @value does not match @pattern.
768 wakaba 1.160 }; # $PatternAttrChecker
769    
770 wakaba 1.161 my $AcceptAttrChecker = sub {
771     my ($self, $attr) = @_;
772    
773     my $value = $attr->value;
774     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
775     my @value = length $value ? split /,/, $value, -1 : ('');
776     my %has_value;
777     for my $v (@value) {
778     if ($has_value{$v}) {
779     $self->{onerror}->(node => $attr,
780     type => 'duplicate token',
781     value => $v,
782     level => $self->{level}->{must});
783     next;
784     }
785     $has_value{$v} = 1;
786    
787     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
788     #
789     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
790     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
791     ## define its own syntax citing RFC 4288.
792    
793     ## NOTE: Parameters not allowed.
794     require Whatpm::IMTChecker;
795     my $ic = Whatpm::IMTChecker->new;
796     $ic->{level} = $self->{level};
797     $ic->check_imt (sub {
798     $self->{onerror}->(@_, node => $attr);
799     }, $1, $2);
800     } else {
801     $self->{onerror}->(node => $attr,
802     type => 'IMTnp:syntax error', ## TODOC: type
803     value => $v,
804     level => $self->{level}->{must});
805     }
806     }
807     }; # $AcceptAttrChecker
808    
809 wakaba 1.165 my $FormControlNameAttrChecker = sub {
810     my ($self, $attr) = @_;
811    
812     unless (length $attr->value) {
813     $self->{onerror}->(node => $attr,
814     type => 'empty control name', ## TODOC: type
815     level => $self->{level}->{must});
816     }
817    
818     ## NOTE: No uniqueness constraint.
819     }; # $FormControlNameAttrChecker
820    
821     my $AutofocusAttrChecker = sub {
822     my ($self, $attr) = @_;
823    
824     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
825    
826     if ($self->{has_autofocus}) {
827     $self->{onerror}->(node => $attr,
828     type => 'duplicate autofocus', ## TODOC: type
829     level => $self->{level}->{must});
830     }
831     $self->{has_autofocus} = 1;
832     }; # $AutofocusAttrChekcer
833    
834 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
835     my ($self, $attr) = @_;
836 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
837 wakaba 1.1 my $value = $attr->value;
838     if ($value =~ s/^#//) {
839 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
840     ## according to the "rules for parsing a hash-name reference" algorithm.
841     ## The document is non-conforming anyway, since |<map name="">| (empty
842     ## name) is non-conforming.
843 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
844     } else {
845 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
846     level => $self->{level}->{must});
847 wakaba 1.1 }
848 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
849 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
850     }; # $HTMLUsemapAttrChecker
851    
852 wakaba 1.76 ## Valid browsing context name
853     my $HTMLBrowsingContextNameAttrChecker = sub {
854     my ($self, $attr) = @_;
855     my $value = $attr->value;
856     if ($value =~ /^_/) {
857     $self->{onerror}->(node => $attr, type => 'window name:reserved',
858 wakaba 1.104 level => $self->{level}->{must},
859 wakaba 1.76 value => $value);
860     } elsif (length $value) {
861     #
862     } else {
863     $self->{onerror}->(node => $attr, type => 'window name:empty',
864 wakaba 1.104 level => $self->{level}->{must});
865 wakaba 1.76 }
866     }; # $HTMLBrowsingContextNameAttrChecker
867    
868     ## Valid browsing context name or keyword
869 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
870     my ($self, $attr) = @_;
871     my $value = $attr->value;
872     if ($value =~ /^_/) {
873     $value = lc $value; ## ISSUE: ASCII case-insentitive?
874     unless ({
875 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
876 wakaba 1.1 }->{$value}) {
877     $self->{onerror}->(node => $attr,
878 wakaba 1.76 type => 'window name:reserved',
879 wakaba 1.104 level => $self->{level}->{must},
880 wakaba 1.76 value => $value);
881 wakaba 1.1 }
882 wakaba 1.76 } elsif (length $value) {
883     #
884 wakaba 1.1 } else {
885 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
886 wakaba 1.104 level => $self->{level}->{must});
887 wakaba 1.1 }
888     }; # $HTMLTargetAttrChecker
889    
890 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
891     my ($self, $attr) = @_;
892    
893     ## ISSUE: Namespace resolution?
894    
895     my $value = $attr->value;
896    
897     require Whatpm::CSS::SelectorsParser;
898     my $p = Whatpm::CSS::SelectorsParser->new;
899     $p->{pseudo_class}->{$_} = 1 for qw/
900     active checked disabled empty enabled first-child first-of-type
901     focus hover indeterminate last-child last-of-type link only-child
902     only-of-type root target visited
903     lang nth-child nth-last-child nth-of-type nth-last-of-type not
904     -manakai-contains -manakai-current
905     /;
906    
907     $p->{pseudo_element}->{$_} = 1 for qw/
908     after before first-letter first-line
909     /;
910    
911 wakaba 1.104 $p->{level} = $self->{level};
912 wakaba 1.23 $p->{onerror} = sub {
913 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
914 wakaba 1.23 };
915     $p->parse_string ($value);
916     }; # $HTMLSelectorsAttrChecker
917    
918 wakaba 1.66 my $HTMLAccesskeyAttrChecker = sub {
919     my ($self, $attr) = @_;
920    
921     ## NOTE: "character" or |%Character;| in HTML4.
922    
923     my $value = $attr->value;
924     if (length $value != 1) {
925     $self->{onerror}->(node => $attr, type => 'char:syntax error',
926 wakaba 1.105 level => $self->{level}->{html4_fact});
927 wakaba 1.66 }
928    
929     ## NOTE: "Note. Authors should consider the input method of the expected
930     ## reader when specifying an accesskey." [HTML4] This is hard to implement,
931     ## since it depends on keyboard and so on.
932     ## NOTE: "We recommend that authors include the access key in label text
933     ## or wherever the access key is to apply." [HTML4] (informative)
934     }; # $HTMLAccesskeyAttrChecker
935    
936 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
937     my ($charset_value, $self, $attr, $ascii_compat) = @_;
938    
939     ## NOTE: This code is used for |charset=""| attributes, |charset=|
940     ## portion of the |content=""| attributes, and |accept-charset=""|
941     ## attributes.
942 wakaba 1.91
943     ## NOTE: Though the case-sensitivility of |charset| attribute value
944     ## is not explicitly spelled in the HTML5 spec, the Character Set
945     ## registry of IANA, which is referenced from HTML5 spec, says that
946     ## charset name is case-insensitive.
947     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
948    
949     require Message::Charset::Info;
950     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
951    
952     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
953     ## Syntactically valid and registered? What about x-charset names?
954     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
955     ($charset_value)) {
956     $self->{onerror}->(node => $attr,
957 wakaba 1.104 type => 'charset:syntax error',
958     value => $charset_value,
959     level => $self->{level}->{must});
960 wakaba 1.91 }
961    
962     if ($charset) {
963     ## ISSUE: What is "the preferred name for that encoding" (for a charset
964     ## with no "preferred MIME name" label)?
965     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
966     if (($charset_status &
967     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
968     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
969     $self->{onerror}->(node => $attr,
970 wakaba 1.104 type => 'charset:not preferred',
971     value => $charset_value,
972     level => $self->{level}->{must});
973 wakaba 1.91 }
974 wakaba 1.129
975 wakaba 1.91 if (($charset_status &
976     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
977     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
978     if ($charset_value =~ /^x-/) {
979     $self->{onerror}->(node => $attr,
980 wakaba 1.104 type => 'charset:private',
981     value => $charset_value,
982     level => $self->{level}->{good});
983 wakaba 1.91 } else {
984     $self->{onerror}->(node => $attr,
985 wakaba 1.104 type => 'charset:not registered',
986     value => $charset_value,
987     level => $self->{level}->{good});
988 wakaba 1.91 }
989     }
990 wakaba 1.129
991     if ($ascii_compat) {
992     if ($charset->{category} &
993     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
994     #
995     } else {
996     $self->{onerror}->(node => $attr,
997     type => 'charset:not ascii compat',
998     value => $charset_value,
999     level => $self->{level}->{must});
1000     }
1001     }
1002    
1003 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
1004     } elsif ($charset_value =~ /^x-/) {
1005     $self->{onerror}->(node => $attr,
1006 wakaba 1.104 type => 'charset:private',
1007     value => $charset_value,
1008     level => $self->{level}->{good});
1009 wakaba 1.129
1010     ## NOTE: Whether this is an ASCII-compatible character encoding or
1011     ## not is unknown.
1012 wakaba 1.91 } else {
1013     $self->{onerror}->(node => $attr,
1014 wakaba 1.104 type => 'charset:not registered',
1015     value => $charset_value,
1016     level => $self->{level}->{good});
1017 wakaba 1.129
1018     ## NOTE: Whether this is an ASCII-compatible character encoding or
1019     ## not is unknown.
1020 wakaba 1.91 }
1021    
1022     return ($charset, $charset_value);
1023     }; # $HTMLCharsetChecker
1024    
1025 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1026     ## MUST be the preferred name of an ASCII-compatible character
1027     ## encoding".
1028     my $HTMLCharsetsAttrChecker = sub {
1029     my ($self, $attr) = @_;
1030    
1031     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1032    
1033 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1034 wakaba 1.129
1035     ## ISSUE: Uniqueness is not enforced.
1036    
1037     for my $charset (@value) {
1038     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1039     }
1040    
1041     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1042     }; # $HTMLCharsetsAttrChecker
1043    
1044 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1045     my ($self, $attr) = @_;
1046    
1047     ## NOTE: HTML4 "color" or |%Color;|
1048    
1049     my $value = $attr->value;
1050    
1051     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1052 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1053 wakaba 1.105 level => $self->{level}->{html4_fact});
1054 wakaba 1.68 }
1055    
1056     ## TODO: HTML4 has some guideline on usage of color.
1057     }; # $HTMLColorAttrChecker
1058    
1059 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1060     my ($self, $attr) = @_;
1061     $HTMLURIAttrChecker->(@_);
1062    
1063     my $attr_name = $attr->name;
1064    
1065     if ($attr_name eq 'ref') {
1066     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1067     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1068 wakaba 1.104 level => $self->{level}->{must});
1069 wakaba 1.79 }
1070     }
1071 wakaba 1.155
1072     require Message::URL;
1073 wakaba 1.79 my $doc = $attr->owner_document;
1074     my $doc_uri = $doc->document_uri;
1075 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1076 wakaba 1.79 my $no_frag_uri = $uri->clone;
1077     $no_frag_uri->uri_fragment (undef);
1078     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1079     (not defined $doc_uri and $no_frag_uri eq '')) {
1080     my $fragid = $uri->uri_fragment;
1081     if (defined $fragid) {
1082     push @{$self->{$attr_name}}, [$fragid => $attr];
1083     } else {
1084     DOCEL: {
1085     last DOCEL unless $attr_name eq 'template';
1086    
1087     my $docel = $doc->document_element;
1088     if ($docel) {
1089     my $nsuri = $docel->namespace_uri;
1090     if (defined $nsuri and $nsuri eq $HTML_NS) {
1091     if ($docel->manakai_local_name eq 'datatemplate') {
1092     last DOCEL;
1093     }
1094     }
1095     }
1096    
1097     $self->{onerror}->(node => $attr, type => 'template:not template',
1098 wakaba 1.104 level => $self->{level}->{must});
1099 wakaba 1.79 } # DOCEL
1100     }
1101     } else {
1102     ## TODO: An external document is referenced.
1103     ## The document MUST be an HTML or XML document.
1104     ## If there is a fragment identifier, it MUST point a part of the doc.
1105     ## If the attribute is |template|, the pointed part MUST be a
1106     ## |datatemplat| element.
1107     ## If no fragment identifier is specified, the root element MUST be
1108     ## a |datatemplate| element when the attribute is |template|.
1109     }
1110     }; # $HTMLRefOrTemplateAttrChecker
1111    
1112 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1113     my ($self, $attr) = @_;
1114    
1115     if (defined $attr->namespace_uri) {
1116     my $oe = $attr->owner_element;
1117     my $oe_nsuri = $oe->namespace_uri;
1118 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1119 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1120 wakaba 1.104 level => $self->{level}->{must});
1121 wakaba 1.83 }
1122     }
1123    
1124     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1125     }; # $HTMLRepeatIndexAttrChecker
1126    
1127 wakaba 1.1 my $HTMLAttrChecker = {
1128 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1129 wakaba 1.1 id => sub {
1130 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1131 wakaba 1.1 my $value = $attr->value;
1132     if (length $value > 0) {
1133     if ($self->{id}->{$value}) {
1134 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1135     level => $self->{level}->{must});
1136 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1137     } else {
1138     $self->{id}->{$value} = [$attr];
1139 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1140 wakaba 1.1 }
1141 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1142 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1143     level => $self->{level}->{must});
1144 wakaba 1.1 }
1145     } else {
1146     ## NOTE: MUST contain at least one character
1147 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1148     level => $self->{level}->{must});
1149 wakaba 1.1 }
1150     },
1151     title => sub {}, ## NOTE: No conformance creteria
1152     lang => sub {
1153     my ($self, $attr) = @_;
1154 wakaba 1.6 my $value = $attr->value;
1155     if ($value eq '') {
1156     #
1157     } else {
1158     require Whatpm::LangTag;
1159     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1160 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1161 wakaba 1.106 }, $self->{level});
1162 wakaba 1.6 }
1163 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1164 wakaba 1.6
1165     ## TODO: test data
1166 wakaba 1.111
1167     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1168     ## non-conforming. Such errors are detected by the checkers of
1169     ## |{}xml:lang| and |{xml}:lang| attributes.
1170 wakaba 1.1 },
1171     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1172     class => sub {
1173     my ($self, $attr) = @_;
1174 wakaba 1.132
1175     ## NOTE: "Unordered set of unique space-separated tokens".
1176    
1177 wakaba 1.1 my %word;
1178 wakaba 1.132 for my $word (grep {length $_}
1179     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1180 wakaba 1.1 unless ($word{$word}) {
1181     $word{$word} = 1;
1182     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1183     } else {
1184 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1185     value => $word,
1186     level => $self->{level}->{must});
1187 wakaba 1.1 }
1188     }
1189     },
1190 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1191     true => 1, false => 1, '' => 1,
1192     }),
1193 wakaba 1.1 contextmenu => sub {
1194     my ($self, $attr) = @_;
1195     my $value = $attr->value;
1196 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1197 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1198     ## What is "in the DOM"? A menu Element node that is not part
1199     ## of the Document tree is in the DOM? A menu Element node that
1200     ## belong to another Document tree is in the DOM?
1201     },
1202 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1203 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1204 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1205     registrationmark => sub {
1206     my ($self, $attr, $item, $element_state) = @_;
1207    
1208     ## NOTE: Any value is conforming.
1209    
1210     if ($self->{flag}->{in_rule}) {
1211     my $el = $attr->owner_element;
1212     my $ln = $el->manakai_local_name;
1213     if ($ln eq 'nest' or
1214     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1215     my $nsuri = $el->namespace_uri;
1216     if (defined $nsuri and $nsuri eq $HTML_NS) {
1217     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1218 wakaba 1.104 level => $self->{level}->{must});
1219 wakaba 1.79 }
1220     }
1221     } else {
1222     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1223 wakaba 1.104 level => $self->{level}->{must});
1224 wakaba 1.79 }
1225     },
1226 wakaba 1.80 repeat => sub {
1227     my ($self, $attr) = @_;
1228 wakaba 1.83
1229     if (defined $attr->namespace_uri) {
1230     my $oe = $attr->owner_element;
1231     my $oe_nsuri = $oe->namespace_uri;
1232     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1233     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1234 wakaba 1.104 level => $self->{level}->{must});
1235 wakaba 1.83 }
1236     }
1237    
1238 wakaba 1.80 my $value = $attr->value;
1239     if ($value eq 'template') {
1240     #
1241     } elsif ($value =~ /\A-?[0-9]+\z/) {
1242     #
1243     } else {
1244     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1245 wakaba 1.104 level => $self->{level}->{must});
1246 wakaba 1.80 }
1247    
1248     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1249     ## that the attribute MAY be specified to any element, or that the
1250     ## element with that attribute (i.e. a repetition template) can be
1251     ## inserted anywhere in a document tree?
1252     },
1253 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1254     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1255     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1256 wakaba 1.80 'repeat-template' => sub {
1257 wakaba 1.83 my ($self, $attr) = @_;
1258    
1259     if (defined $attr->namespace_uri) {
1260     my $oe = $attr->owner_element;
1261     my $oe_nsuri = $oe->namespace_uri;
1262 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1263 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1264 wakaba 1.104 level => $self->{level}->{must});
1265 wakaba 1.83 }
1266     }
1267    
1268 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1269     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1270     ## attribute allowed on an element that is not a repetition block?
1271     },
1272 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1273 wakaba 1.128 style => sub {
1274     my ($self, $attr) = @_;
1275    
1276     $self->{onsubdoc}->({s => $attr->value,
1277     container_node => $attr,
1278     media_type => 'text/x-css-inline',
1279     is_char_string => 1});
1280    
1281     ## NOTE: "... MUST still be comprehensible and usable if those
1282     ## attributes were removed" is a semantic requirement, it cannot
1283     ## be tested.
1284     },
1285 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1286 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1287 wakaba 1.111 'xml:lang' => sub {
1288     my ($self, $attr) = @_;
1289    
1290     if ($attr->owner_document->manakai_is_html) {
1291     $self->{onerror}->(type => 'in HTML:xml:lang',
1292     level => $self->{level}->{info},
1293     node => $attr);
1294     ## NOTE: This is not an error, but the attribute will be ignored.
1295     } else {
1296     $self->{onerror}->(type => 'in XML:xml:lang',
1297     level => $self->{level}->{html5_no_may},
1298     node => $attr);
1299     ## TODO: We need to add test for this error.
1300     }
1301    
1302     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1303     (undef, 'lang');
1304     if ($lang_attr) {
1305     my $lang_attr_value = $lang_attr->value;
1306     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1307     my $value = $attr->value;
1308     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1309     if ($lang_attr_value ne $value) {
1310     $self->{onerror}->(type => 'xml:lang ne lang',
1311     level => $self->{level}->{must},
1312     node => $attr);
1313     }
1314     } else {
1315     $self->{onerror}->(type => 'xml:lang not allowed',
1316     level => $self->{level}->{must},
1317     node => $attr);
1318     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1319     }
1320     },
1321 wakaba 1.74 xmlns => sub {
1322     my ($self, $attr) = @_;
1323     my $value = $attr->value;
1324     unless ($value eq $HTML_NS) {
1325 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1326     level => $self->{level}->{must});
1327 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1328     }
1329     unless ($attr->owner_document->manakai_is_html) {
1330 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1331     level => $self->{level}->{must});
1332 wakaba 1.74 ## TODO: Test
1333     }
1334    
1335     ## TODO: Should be resolved?
1336     push @{$self->{return}->{uri}->{$value} ||= []},
1337     {node => $attr, type => {namespace => 1}};
1338     },
1339 wakaba 1.1 };
1340    
1341 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1342    
1343 wakaba 1.49 my %HTMLAttrStatus = (
1344 wakaba 1.153 class => FEATURE_HTML5_WD,
1345 wakaba 1.50 contenteditable => FEATURE_HTML5_DEFAULT,
1346     contextmenu => FEATURE_HTML5_WD,
1347 wakaba 1.153 dir => FEATURE_HTML5_WD,
1348 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1349 wakaba 1.115 hidden => FEATURE_HTML5_DEFAULT,
1350 wakaba 1.153 id => FEATURE_HTML5_WD,
1351 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1352 wakaba 1.153 lang => FEATURE_HTML5_WD,
1353 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1354     registrationmark => FEATURE_HTML5_AT_RISK,
1355 wakaba 1.60 repeat => FEATURE_WF2,
1356     'repeat-max' => FEATURE_WF2,
1357     'repeat-min' => FEATURE_WF2,
1358     'repeat-start' => FEATURE_WF2,
1359     'repeat-template' => FEATURE_WF2,
1360 wakaba 1.154 role => 0,
1361 wakaba 1.153 style => FEATURE_HTML5_WD,
1362 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1363     template => FEATURE_HTML5_AT_RISK,
1364 wakaba 1.153 title => FEATURE_HTML5_WD,
1365 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1366 wakaba 1.49 );
1367    
1368     my %HTMLM12NCommonAttrStatus = (
1369 wakaba 1.154 about => FEATURE_RDFA_REC,
1370 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1371 wakaba 1.154 content => FEATURE_RDFA_REC,
1372     datatype => FEATURE_RDFA_REC,
1373 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1374 wakaba 1.154 href => FEATURE_RDFA_REC,
1375 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1376 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1377 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1378     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1379     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1380     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1381     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1382     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1383     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1384     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1385     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1386     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1387 wakaba 1.154 property => FEATURE_RDFA_REC,
1388     rel => FEATURE_RDFA_REC,
1389     resource => FEATURE_RDFA_REC,
1390     rev => FEATURE_RDFA_REC,
1391 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1392 wakaba 1.78 # FEATURE_M12N10_REC,
1393 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1394 wakaba 1.55 FEATURE_M12N10_REC,
1395 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1396 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1397 wakaba 1.49 );
1398    
1399 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1400     ## Core
1401 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1402     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1403 wakaba 1.82 #xml:id
1404     layout => FEATURE_XHTML2_ED,
1405 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1406 wakaba 1.82
1407     ## Hypertext
1408     cite => FEATURE_XHTML2_ED,
1409     href => FEATURE_XHTML2_ED,
1410     hreflang => FEATURE_XHTML2_ED,
1411     hrefmedia => FEATURE_XHTML2_ED,
1412     hreftype => FEATURE_XHTML2_ED,
1413     nextfocus => FEATURE_XHTML2_ED,
1414     prevfocus => FEATURE_XHTML2_ED,
1415     target => FEATURE_XHTML2_ED,
1416     #xml:base
1417    
1418     ## I18N
1419     #xml:lang
1420    
1421     ## Bi-directional
1422 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1423 wakaba 1.82
1424     ## Edit
1425     edit => FEATURE_XHTML2_ED,
1426     datetime => FEATURE_XHTML2_ED,
1427    
1428     ## Embedding
1429     encoding => FEATURE_XHTML2_ED,
1430     src => FEATURE_XHTML2_ED,
1431     srctype => FEATURE_XHTML2_ED,
1432    
1433     ## Image Map
1434     usemap => FEATURE_XHTML2_ED,
1435     ismap => FEATURE_XHTML2_ED,
1436     shape => FEATURE_XHTML2_ED,
1437     coords => FEATURE_XHTML2_ED,
1438    
1439     ## Media
1440     media => FEATURE_XHTML2_ED,
1441    
1442     ## Metadata
1443     about => FEATURE_XHTML2_ED,
1444     content => FEATURE_XHTML2_ED,
1445     datatype => FEATURE_XHTML2_ED,
1446     instanceof => FEATURE_XHTML2_ED,
1447     property => FEATURE_XHTML2_ED,
1448     rel => FEATURE_XHTML2_ED,
1449     resource => FEATURE_XHTML2_ED,
1450     rev => FEATURE_XHTML2_ED,
1451    
1452     ## Role
1453 wakaba 1.154 role => FEATURE_XHTML2_ED,
1454 wakaba 1.82
1455     ## Style
1456 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML2_ED, # "strongly discouraged"
1457 wakaba 1.82 );
1458    
1459     my %HTMLM12NXHTML2CommonAttrStatus = (
1460     %HTMLM12NCommonAttrStatus,
1461     %XHTML2CommonAttrStatus,
1462    
1463 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1464 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1465 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1466     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1467 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1468 wakaba 1.154 href => FEATURE_RDFA_REC,
1469 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1470 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1471     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1472     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1473     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1474     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1475 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1476 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1477 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1478 wakaba 1.82 FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1479 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1480 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1481 wakaba 1.82 );
1482    
1483 wakaba 1.1 for (qw/
1484     onabort onbeforeunload onblur onchange onclick oncontextmenu
1485     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1486     ondragstart ondrop onerror onfocus onkeydown onkeypress
1487     onkeyup onload onmessage onmousedown onmousemove onmouseout
1488     onmouseover onmouseup onmousewheel onresize onscroll onselect
1489 wakaba 1.77 onstorage onsubmit onunload
1490 wakaba 1.1 /) {
1491     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1492 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1493 wakaba 1.1 }
1494    
1495 wakaba 1.170 for (qw/
1496     ondataunavailable
1497     /) {
1498     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1499     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1500     }
1501    
1502 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1503     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1504     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1505    
1506     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1507     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1508     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1509     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1510     }
1511    
1512 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1513 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1514 wakaba 1.82 }
1515 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1516     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1517 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1518     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1519     ismap layout media nextfocus prevfocus shape src srctype style
1520     target usemap/) {
1521     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1522     }
1523     for (qw/class dir id title/) {
1524     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1525     }
1526     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1527     onmouseout onkeypress onkeydown onkeyup/) {
1528     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1529     }
1530    
1531 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1532     ## NOTE: "Authors should ... when the attributes are ignored and
1533     ## any associated CSS dropped, the page is still usable." (semantic
1534     ## constraint.)
1535     }; # $HTMLDatasetAttrChecker
1536    
1537 wakaba 1.153 my $HTMLDatasetAttrStatus = FEATURE_HTML5_WD;
1538 wakaba 1.73
1539 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1540     my $element_specific_checker = shift;
1541 wakaba 1.49 my $element_specific_status = shift;
1542 wakaba 1.1 return sub {
1543 wakaba 1.40 my ($self, $item, $element_state) = @_;
1544     for my $attr (@{$item->{node}->attributes}) {
1545 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1546     $attr_ns = '' unless defined $attr_ns;
1547     my $attr_ln = $attr->manakai_local_name;
1548     my $checker;
1549 wakaba 1.73 my $status;
1550 wakaba 1.1 if ($attr_ns eq '') {
1551 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1552     $attr_ln !~ /[A-Z]/) {
1553 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1554     $status = $HTMLDatasetAttrStatus;
1555     } else {
1556     $checker = $element_specific_checker->{$attr_ln}
1557     || $HTMLAttrChecker->{$attr_ln};
1558     $status = $element_specific_status->{$attr_ln};
1559     }
1560 wakaba 1.1 }
1561     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1562 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1563 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1564     || $AttrStatus->{$attr_ns}->{''};
1565     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1566 wakaba 1.1 if ($checker) {
1567 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1568 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1569 wakaba 1.54 #
1570 wakaba 1.1 } else {
1571 wakaba 1.104 $self->{onerror}->(node => $attr,
1572     type => 'unknown attribute',
1573     level => $self->{level}->{uncertain});
1574 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1575     }
1576 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1577 wakaba 1.1 }
1578     };
1579     }; # $GetHTMLAttrsChecker
1580    
1581 wakaba 1.40 my %HTMLChecker = (
1582     %Whatpm::ContentChecker::AnyChecker,
1583 wakaba 1.79 check_start => sub {
1584     my ($self, $item, $element_state) = @_;
1585    
1586     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1587     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1588     },
1589 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1590 wakaba 1.40 );
1591    
1592     my %HTMLEmptyChecker = (
1593     %HTMLChecker,
1594     check_child_element => sub {
1595     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1596     $child_is_transparent, $element_state) = @_;
1597 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1598     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1599 wakaba 1.40 $self->{onerror}->(node => $child_el,
1600     type => 'element not allowed:minus',
1601 wakaba 1.104 level => $self->{level}->{must});
1602 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1603     #
1604     } else {
1605     $self->{onerror}->(node => $child_el,
1606     type => 'element not allowed:empty',
1607 wakaba 1.104 level => $self->{level}->{must});
1608 wakaba 1.40 }
1609     },
1610     check_child_text => sub {
1611     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1612     if ($has_significant) {
1613     $self->{onerror}->(node => $child_node,
1614     type => 'character not allowed:empty',
1615 wakaba 1.104 level => $self->{level}->{must});
1616 wakaba 1.40 }
1617     },
1618     );
1619    
1620     my %HTMLTextChecker = (
1621     %HTMLChecker,
1622     check_child_element => sub {
1623     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1624     $child_is_transparent, $element_state) = @_;
1625 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1626     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1627 wakaba 1.40 $self->{onerror}->(node => $child_el,
1628     type => 'element not allowed:minus',
1629 wakaba 1.104 level => $self->{level}->{must});
1630 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1631     #
1632     } else {
1633 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1634     level => $self->{level}->{must});
1635 wakaba 1.40 }
1636     },
1637     );
1638    
1639 wakaba 1.72 my %HTMLFlowContentChecker = (
1640 wakaba 1.40 %HTMLChecker,
1641     check_child_element => sub {
1642     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1643     $child_is_transparent, $element_state) = @_;
1644 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1645     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1646 wakaba 1.40 $self->{onerror}->(node => $child_el,
1647     type => 'element not allowed:minus',
1648 wakaba 1.104 level => $self->{level}->{must});
1649 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1650     #
1651     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1652     if ($element_state->{has_non_style} or
1653     not $child_el->has_attribute_ns (undef, 'scoped')) {
1654 wakaba 1.104 $self->{onerror}->(node => $child_el,
1655 wakaba 1.72 type => 'element not allowed:flow style',
1656 wakaba 1.104 level => $self->{level}->{must});
1657 wakaba 1.40 }
1658 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1659 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1660 wakaba 1.40 } else {
1661     $element_state->{has_non_style} = 1;
1662 wakaba 1.104 $self->{onerror}->(node => $child_el,
1663 wakaba 1.72 type => 'element not allowed:flow',
1664 wakaba 1.104 level => $self->{level}->{must})
1665 wakaba 1.40 }
1666     },
1667     check_child_text => sub {
1668     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1669     if ($has_significant) {
1670     $element_state->{has_non_style} = 1;
1671     }
1672     },
1673     check_end => sub {
1674     my ($self, $item, $element_state) = @_;
1675 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1676 wakaba 1.40 if ($element_state->{has_significant}) {
1677 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1678 wakaba 1.40 } elsif ($item->{transparent}) {
1679     #
1680     } else {
1681     $self->{onerror}->(node => $item->{node},
1682 wakaba 1.104 level => $self->{level}->{should},
1683 wakaba 1.40 type => 'no significant content');
1684     }
1685     },
1686     );
1687    
1688     my %HTMLPhrasingContentChecker = (
1689     %HTMLChecker,
1690     check_child_element => sub {
1691     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1692     $child_is_transparent, $element_state) = @_;
1693 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1694     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1695 wakaba 1.40 $self->{onerror}->(node => $child_el,
1696     type => 'element not allowed:minus',
1697 wakaba 1.104 level => $self->{level}->{must});
1698 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1699     #
1700     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1701     #
1702     } else {
1703     $self->{onerror}->(node => $child_el,
1704     type => 'element not allowed:phrasing',
1705 wakaba 1.104 level => $self->{level}->{must});
1706 wakaba 1.40 }
1707     },
1708 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1709 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1710 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1711 wakaba 1.40 ## and |check_child_text|.
1712     );
1713    
1714 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1715 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1716 wakaba 1.46 ## with parent?
1717 wakaba 1.40
1718 wakaba 1.1 our $Element;
1719     our $ElementDefault;
1720    
1721     $Element->{$HTML_NS}->{''} = {
1722 wakaba 1.40 %HTMLChecker,
1723 wakaba 1.1 };
1724    
1725     $Element->{$HTML_NS}->{html} = {
1726 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1727 wakaba 1.1 is_root => 1,
1728 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1729 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1730 wakaba 1.67 version => sub {
1731     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1732     ## Though DTDs of various versions of HTML define the attribute
1733     ## as |#FIXED|, this conformance checker does no check for
1734     ## the attribute value, since what kind of check should be done
1735     ## is unknown.
1736     },
1737 wakaba 1.49 }, {
1738     %HTMLAttrStatus,
1739 wakaba 1.82 %XHTML2CommonAttrStatus,
1740 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1741     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1742     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1743     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1744     manifest => FEATURE_HTML5_WD,
1745 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1746 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1747 wakaba 1.1 }),
1748 wakaba 1.40 check_start => sub {
1749     my ($self, $item, $element_state) = @_;
1750     $element_state->{phase} = 'before head';
1751 wakaba 1.79
1752 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1753 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1754     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1755 wakaba 1.40 },
1756     check_child_element => sub {
1757     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1758     $child_is_transparent, $element_state) = @_;
1759 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1760     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1761 wakaba 1.40 $self->{onerror}->(node => $child_el,
1762     type => 'element not allowed:minus',
1763 wakaba 1.104 level => $self->{level}->{must});
1764 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1765     #
1766     } elsif ($element_state->{phase} eq 'before head') {
1767     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1768     $element_state->{phase} = 'after head';
1769     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1770     $self->{onerror}->(node => $child_el,
1771 wakaba 1.104 type => 'ps element missing',
1772     text => 'head',
1773     level => $self->{level}->{must});
1774 wakaba 1.40 $element_state->{phase} = 'after body';
1775     } else {
1776     $self->{onerror}->(node => $child_el,
1777 wakaba 1.104 type => 'element not allowed',
1778     level => $self->{level}->{must});
1779 wakaba 1.40 }
1780     } elsif ($element_state->{phase} eq 'after head') {
1781     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1782     $element_state->{phase} = 'after body';
1783     } else {
1784     $self->{onerror}->(node => $child_el,
1785 wakaba 1.104 type => 'element not allowed',
1786     level => $self->{level}->{must});
1787 wakaba 1.40 }
1788     } elsif ($element_state->{phase} eq 'after body') {
1789     $self->{onerror}->(node => $child_el,
1790 wakaba 1.104 type => 'element not allowed',
1791     level => $self->{level}->{must});
1792 wakaba 1.40 } else {
1793     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1794     }
1795     },
1796     check_child_text => sub {
1797     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1798     if ($has_significant) {
1799     $self->{onerror}->(node => $child_node,
1800 wakaba 1.104 type => 'character not allowed',
1801     level => $self->{level}->{must});
1802 wakaba 1.40 }
1803     },
1804     check_end => sub {
1805     my ($self, $item, $element_state) = @_;
1806     if ($element_state->{phase} eq 'after body') {
1807     #
1808     } elsif ($element_state->{phase} eq 'before head') {
1809     $self->{onerror}->(node => $item->{node},
1810 wakaba 1.104 type => 'child element missing',
1811     text => 'head',
1812     level => $self->{level}->{must});
1813 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1814 wakaba 1.104 type => 'child element missing',
1815     text => 'body',
1816     level => $self->{level}->{must});
1817 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1818     $self->{onerror}->(node => $item->{node},
1819 wakaba 1.104 type => 'child element missing',
1820     text => 'body',
1821     level => $self->{level}->{must});
1822 wakaba 1.40 } else {
1823     die "check_end: Bad |html| phase: $element_state->{phase}";
1824     }
1825 wakaba 1.1
1826 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1827     },
1828     };
1829 wakaba 1.25
1830 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1831 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1832 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1833     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1834     }, {
1835 wakaba 1.49 %HTMLAttrStatus,
1836 wakaba 1.82 %XHTML2CommonAttrStatus,
1837 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1838     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1839     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1840     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1841 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1842     }),
1843 wakaba 1.40 check_child_element => sub {
1844     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1845     $child_is_transparent, $element_state) = @_;
1846 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1847     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1848 wakaba 1.40 $self->{onerror}->(node => $child_el,
1849     type => 'element not allowed:minus',
1850 wakaba 1.104 level => $self->{level}->{must});
1851 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1852     #
1853     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1854     unless ($element_state->{has_title}) {
1855     $element_state->{has_title} = 1;
1856     } else {
1857     $self->{onerror}->(node => $child_el,
1858     type => 'element not allowed:head title',
1859 wakaba 1.104 level => $self->{level}->{must});
1860 wakaba 1.40 }
1861     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1862     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1863     $self->{onerror}->(node => $child_el,
1864     type => 'element not allowed:head style',
1865 wakaba 1.104 level => $self->{level}->{must});
1866 wakaba 1.1 }
1867 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1868     #
1869    
1870     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1871     ## a |meta| element with none of |charset|, |name|,
1872     ## or |http-equiv| attribute is not allowed. It is non-conforming
1873     ## anyway.
1874 wakaba 1.56
1875     ## TODO: |form| MUST be empty and in XML [WF2].
1876 wakaba 1.40 } else {
1877     $self->{onerror}->(node => $child_el,
1878     type => 'element not allowed:metadata',
1879 wakaba 1.104 level => $self->{level}->{must});
1880 wakaba 1.40 }
1881     $element_state->{in_head_original} = $self->{flag}->{in_head};
1882     $self->{flag}->{in_head} = 1;
1883     },
1884     check_child_text => sub {
1885     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1886     if ($has_significant) {
1887 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1888     level => $self->{level}->{must});
1889 wakaba 1.1 }
1890 wakaba 1.40 },
1891     check_end => sub {
1892     my ($self, $item, $element_state) = @_;
1893     unless ($element_state->{has_title}) {
1894     $self->{onerror}->(node => $item->{node},
1895 wakaba 1.104 type => 'child element missing',
1896     text => 'title',
1897 wakaba 1.105 level => $self->{level}->{must});
1898 wakaba 1.1 }
1899 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1900 wakaba 1.1
1901 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1902 wakaba 1.1 },
1903     };
1904    
1905 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1906     %HTMLTextChecker,
1907 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1908 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1909     %HTMLAttrStatus,
1910 wakaba 1.82 %XHTML2CommonAttrStatus,
1911 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1912     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1913     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1914     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1915 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1916 wakaba 1.49 }),
1917 wakaba 1.40 };
1918 wakaba 1.1
1919 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1920 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1921 wakaba 1.40 %HTMLEmptyChecker,
1922     check_attrs => sub {
1923     my ($self, $item, $element_state) = @_;
1924 wakaba 1.1
1925 wakaba 1.40 if ($self->{has_base}) {
1926     $self->{onerror}->(node => $item->{node},
1927 wakaba 1.104 type => 'element not allowed:base',
1928     level => $self->{level}->{must});
1929 wakaba 1.40 } else {
1930     $self->{has_base} = 1;
1931 wakaba 1.29 }
1932    
1933 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1934     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1935 wakaba 1.14
1936     if ($self->{has_uri_attr} and $has_href) {
1937 wakaba 1.4 ## ISSUE: Are these examples conforming?
1938     ## <head profile="a b c"><base href> (except for |profile|'s
1939     ## non-conformance)
1940     ## <title xml:base="relative"/><base href/> (maybe it should be)
1941     ## <unknown xmlns="relative"/><base href/> (assuming that
1942     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1943     ## <style>@import 'relative';</style><base href>
1944     ## <script>location.href = 'relative';</script><base href>
1945 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1946     ## an exception.
1947 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1948 wakaba 1.104 type => 'basehref after URL attribute',
1949     level => $self->{level}->{must});
1950 wakaba 1.4 }
1951 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1952 wakaba 1.4 ## ISSUE: Are these examples conforming?
1953     ## <head><title xlink:href=""/><base target="name"/></head>
1954     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1955     ## (assuming that |xbl:xbl| is allowed before |base|)
1956     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1957     ## <link href=""/><base target="name"/>
1958     ## <link rel=unknown href=""><base target=name>
1959 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1960 wakaba 1.104 type => 'basetarget after hyperlink',
1961     level => $self->{level}->{must});
1962 wakaba 1.4 }
1963    
1964 wakaba 1.14 if (not $has_href and not $has_target) {
1965 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1966 wakaba 1.104 type => 'attribute missing:href|target',
1967     level => $self->{level}->{must});
1968 wakaba 1.14 }
1969    
1970 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1971    
1972 wakaba 1.4 return $GetHTMLAttrsChecker->({
1973     href => $HTMLURIAttrChecker,
1974     target => $HTMLTargetAttrChecker,
1975 wakaba 1.49 }, {
1976     %HTMLAttrStatus,
1977 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1978     id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1979     target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1980 wakaba 1.40 })->($self, $item, $element_state);
1981 wakaba 1.4 },
1982 wakaba 1.1 };
1983    
1984     $Element->{$HTML_NS}->{link} = {
1985 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1986 wakaba 1.40 %HTMLEmptyChecker,
1987     check_attrs => sub {
1988     my ($self, $item, $element_state) = @_;
1989 wakaba 1.96 my $sizes_attr;
1990 wakaba 1.1 $GetHTMLAttrsChecker->({
1991 wakaba 1.91 charset => sub {
1992     my ($self, $attr) = @_;
1993     $HTMLCharsetChecker->($attr->value, @_);
1994     },
1995 wakaba 1.1 href => $HTMLURIAttrChecker,
1996 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1997 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
1998 wakaba 1.1 media => $HTMLMQAttrChecker,
1999     hreflang => $HTMLLanguageTagAttrChecker,
2000 wakaba 1.96 sizes => sub {
2001     my ($self, $attr) = @_;
2002     $sizes_attr = $attr;
2003     my %word;
2004     for my $word (grep {length $_}
2005 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2006 wakaba 1.96 unless ($word{$word}) {
2007     $word{$word} = 1;
2008     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2009     #
2010     } else {
2011     $self->{onerror}->(node => $attr,
2012 wakaba 1.104 type => 'sizes:syntax error',
2013 wakaba 1.96 value => $word,
2014 wakaba 1.104 level => $self->{level}->{must});
2015 wakaba 1.96 }
2016     } else {
2017     $self->{onerror}->(node => $attr, type => 'duplicate token',
2018     value => $word,
2019 wakaba 1.104 level => $self->{level}->{must});
2020 wakaba 1.96 }
2021     }
2022     },
2023 wakaba 1.70 target => $HTMLTargetAttrChecker,
2024 wakaba 1.1 type => $HTMLIMTAttrChecker,
2025     ## NOTE: Though |title| has special semantics,
2026     ## syntactically same as the |title| as global attribute.
2027 wakaba 1.49 }, {
2028     %HTMLAttrStatus,
2029 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2030 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2031     ## NOTE: |charset| attribute had been part of HTML5 spec though
2032     ## it had been commented out.
2033 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2034 wakaba 1.82 FEATURE_M12N10_REC,
2035 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2036     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2037     media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2038 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2039 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2040 wakaba 1.153 FEATURE_M12N10_REC,
2041 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2042 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2043 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2044 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2045 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2046     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2047 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2048 wakaba 1.40 })->($self, $item, $element_state);
2049 wakaba 1.96
2050 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2051     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2052 wakaba 1.4 } else {
2053 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2054 wakaba 1.104 type => 'attribute missing',
2055     text => 'href',
2056     level => $self->{level}->{must});
2057 wakaba 1.1 }
2058 wakaba 1.96
2059 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2060     $self->{onerror}->(node => $item->{node},
2061 wakaba 1.104 type => 'attribute missing',
2062     text => 'rel',
2063     level => $self->{level}->{must});
2064 wakaba 1.96 }
2065    
2066     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2067     $self->{onerror}->(node => $sizes_attr,
2068     type => 'attribute not allowed',
2069 wakaba 1.104 level => $self->{level}->{must});
2070 wakaba 1.1 }
2071 wakaba 1.116
2072     if ($element_state->{link_rel}->{alternate} and
2073     $element_state->{link_rel}->{stylesheet}) {
2074     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2075     unless ($title_attr) {
2076     $self->{onerror}->(node => $item->{node},
2077     type => 'attribute missing',
2078     text => 'title',
2079     level => $self->{level}->{must});
2080     } elsif ($title_attr->value eq '') {
2081     $self->{onerror}->(node => $title_attr,
2082     type => 'empty style sheet title',
2083     level => $self->{level}->{must});
2084     }
2085     }
2086 wakaba 1.1 },
2087     };
2088    
2089     $Element->{$HTML_NS}->{meta} = {
2090 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2091 wakaba 1.40 %HTMLEmptyChecker,
2092     check_attrs => sub {
2093     my ($self, $item, $element_state) = @_;
2094 wakaba 1.1 my $name_attr;
2095     my $http_equiv_attr;
2096     my $charset_attr;
2097     my $content_attr;
2098 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2099 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2100     $attr_ns = '' unless defined $attr_ns;
2101     my $attr_ln = $attr->manakai_local_name;
2102     my $checker;
2103 wakaba 1.73 my $status;
2104 wakaba 1.1 if ($attr_ns eq '') {
2105 wakaba 1.73 $status = {
2106     %HTMLAttrStatus,
2107 wakaba 1.82 %XHTML2CommonAttrStatus,
2108 wakaba 1.153 charset => FEATURE_HTML5_WD,
2109     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2110     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2111     'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2112     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2113     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2114     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2115 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2116     }->{$attr_ln};
2117    
2118 wakaba 1.1 if ($attr_ln eq 'content') {
2119     $content_attr = $attr;
2120     $checker = 1;
2121     } elsif ($attr_ln eq 'name') {
2122     $name_attr = $attr;
2123     $checker = 1;
2124     } elsif ($attr_ln eq 'http-equiv') {
2125     $http_equiv_attr = $attr;
2126     $checker = 1;
2127     } elsif ($attr_ln eq 'charset') {
2128     $charset_attr = $attr;
2129     $checker = 1;
2130 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2131 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2132 wakaba 1.67 $checker = sub {};
2133 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2134     $attr_ln !~ /[A-Z]/) {
2135 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2136     $status = $HTMLDatasetAttrStatus;
2137 wakaba 1.1 } else {
2138     $checker = $HTMLAttrChecker->{$attr_ln}
2139 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2140 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2141     }
2142     } else {
2143     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2144 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2145     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2146     || $AttrStatus->{$attr_ns}->{''};
2147     $status = FEATURE_ALLOWED if not defined $status;
2148 wakaba 1.1 }
2149 wakaba 1.62
2150 wakaba 1.1 if ($checker) {
2151 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2152 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2153 wakaba 1.54 #
2154 wakaba 1.1 } else {
2155 wakaba 1.104 $self->{onerror}->(node => $attr,
2156     type => 'unknown attribute',
2157     level => $self->{level}->{uncertain});
2158 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2159     }
2160    
2161 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2162 wakaba 1.1 }
2163    
2164     if (defined $name_attr) {
2165     if (defined $http_equiv_attr) {
2166     $self->{onerror}->(node => $http_equiv_attr,
2167 wakaba 1.104 type => 'attribute not allowed',
2168     level => $self->{level}->{must});
2169 wakaba 1.1 } elsif (defined $charset_attr) {
2170     $self->{onerror}->(node => $charset_attr,
2171 wakaba 1.104 type => 'attribute not allowed',
2172     level => $self->{level}->{must});
2173 wakaba 1.1 }
2174     my $metadata_name = $name_attr->value;
2175     my $metadata_value;
2176     if (defined $content_attr) {
2177     $metadata_value = $content_attr->value;
2178     } else {
2179 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2180 wakaba 1.104 type => 'attribute missing',
2181     text => 'content',
2182     level => $self->{level}->{must});
2183 wakaba 1.1 $metadata_value = '';
2184     }
2185     } elsif (defined $http_equiv_attr) {
2186     if (defined $charset_attr) {
2187     $self->{onerror}->(node => $charset_attr,
2188 wakaba 1.104 type => 'attribute not allowed',
2189     level => $self->{level}->{must});
2190 wakaba 1.1 }
2191     unless (defined $content_attr) {
2192 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2193 wakaba 1.104 type => 'attribute missing',
2194     text => 'content',
2195     level => $self->{level}->{must});
2196 wakaba 1.1 }
2197     } elsif (defined $charset_attr) {
2198     if (defined $content_attr) {
2199     $self->{onerror}->(node => $content_attr,
2200 wakaba 1.104 type => 'attribute not allowed',
2201     level => $self->{level}->{must});
2202 wakaba 1.1 }
2203     } else {
2204     if (defined $content_attr) {
2205     $self->{onerror}->(node => $content_attr,
2206 wakaba 1.104 type => 'attribute not allowed',
2207     level => $self->{level}->{must});
2208 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2209 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2210     level => $self->{level}->{must});
2211 wakaba 1.1 } else {
2212 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2213 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2214     level => $self->{level}->{must});
2215 wakaba 1.1 }
2216     }
2217    
2218 wakaba 1.32 my $check_charset_decl = sub () {
2219 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2220 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2221     for my $el (@{$parent->child_nodes}) {
2222     next unless $el->node_type == 1; # ELEMENT_NODE
2223 wakaba 1.40 unless ($el eq $item->{node}) {
2224 wakaba 1.29 ## NOTE: Not the first child element.
2225 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2226 wakaba 1.32 type => 'element not allowed:meta charset',
2227 wakaba 1.104 level => $self->{level}->{must});
2228 wakaba 1.29 }
2229     last;
2230     ## NOTE: Entity references are not supported.
2231     }
2232     } else {
2233 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2234 wakaba 1.32 type => 'element not allowed:meta charset',
2235 wakaba 1.104 level => $self->{level}->{must});
2236 wakaba 1.29 }
2237    
2238 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2239     $self->{onerror}->(node => $item->{node},
2240 wakaba 1.32 type => 'in XML:charset',
2241 wakaba 1.104 level => $self->{level}->{must});
2242 wakaba 1.1 }
2243 wakaba 1.32 }; # $check_charset_decl
2244 wakaba 1.21
2245 wakaba 1.32 my $check_charset = sub ($$) {
2246     my ($attr, $charset_value) = @_;
2247 wakaba 1.21
2248 wakaba 1.91 my $charset;
2249     ($charset, $charset_value)
2250     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2251    
2252 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2253 wakaba 1.21 if (defined $ic) {
2254     ## TODO: Test for this case
2255     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2256     if ($charset ne $ic_charset) {
2257 wakaba 1.32 $self->{onerror}->(node => $attr,
2258 wakaba 1.104 type => 'mismatched charset name',
2259 wakaba 1.106 text => $ic,
2260 wakaba 1.104 value => $charset_value,
2261     level => $self->{level}->{must});
2262 wakaba 1.21 }
2263     } else {
2264     ## NOTE: MUST, but not checkable, since the document is not originally
2265     ## in serialized form (or the parser does not preserve the input
2266     ## encoding information).
2267 wakaba 1.32 $self->{onerror}->(node => $attr,
2268 wakaba 1.104 type => 'mismatched charset name not checked',
2269     value => $charset_value,
2270     level => $self->{level}->{uncertain});
2271 wakaba 1.21 }
2272    
2273 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2274     $self->{onerror}->(node => $attr,
2275 wakaba 1.104 type => 'charref in charset',
2276     level => $self->{level}->{must},
2277     layer => 'syntax');
2278 wakaba 1.22 }
2279 wakaba 1.32 }; # $check_charset
2280    
2281     ## TODO: metadata conformance
2282    
2283     ## TODO: pragma conformance
2284     if (defined $http_equiv_attr) { ## An enumerated attribute
2285     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2286 wakaba 1.33
2287 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2288     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2289     node => $http_equiv_attr,
2290 wakaba 1.104 level => $self->{level}->{must});
2291 wakaba 1.85 } else {
2292     $self->{has_http_equiv}->{$keyword} = 1;
2293     }
2294    
2295     if ($keyword eq 'content-type') {
2296 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2297 wakaba 1.33
2298 wakaba 1.32 $check_charset_decl->();
2299     if ($content_attr) {
2300     my $content = $content_attr->value;
2301 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2302 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2303 wakaba 1.58 =(.+)\z!sx) {
2304 wakaba 1.32 $check_charset->($content_attr, $1);
2305     } else {
2306     $self->{onerror}->(node => $content_attr,
2307     type => 'meta content-type syntax error',
2308 wakaba 1.104 level => $self->{level}->{must});
2309 wakaba 1.85 }
2310     }
2311     } elsif ($keyword eq 'default-style') {
2312     ## ISSUE: Not defined yet in the spec.
2313     } elsif ($keyword eq 'refresh') {
2314     if ($content_attr) {
2315     my $content = $content_attr->value;
2316     if ($content =~ /\A[0-9]+\z/) {
2317     ## NOTE: Valid non-negative integer.
2318     #
2319 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2320 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2321     Whatpm::URIChecker->check_iri_reference ($content, sub {
2322 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2323 wakaba 1.106 }, $self->{level});
2324 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2325    
2326     $element_state->{uri_info}->{content}->{node} = $content_attr;
2327     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2328     ## TODO: absolute
2329     push @{$self->{return}->{uri}->{$content} ||= []},
2330     $element_state->{uri_info}->{content};
2331     } else {
2332     $self->{onerror}->(node => $content_attr,
2333     type => 'refresh:syntax error',
2334 wakaba 1.104 level => $self->{level}->{must});
2335 wakaba 1.32 }
2336     }
2337     } else {
2338     $self->{onerror}->(node => $http_equiv_attr,
2339 wakaba 1.104 type => 'enumerated:invalid',
2340     level => $self->{level}->{must});
2341 wakaba 1.32 }
2342     }
2343    
2344     if (defined $charset_attr) {
2345     $check_charset_decl->();
2346     $check_charset->($charset_attr, $charset_attr->value);
2347 wakaba 1.1 }
2348     },
2349     };
2350    
2351     $Element->{$HTML_NS}->{style} = {
2352 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2353 wakaba 1.40 %HTMLChecker,
2354     check_attrs => $GetHTMLAttrsChecker->({
2355 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2356     media => $HTMLMQAttrChecker,
2357     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2358     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2359     ## not different
2360 wakaba 1.49 }, {
2361     %HTMLAttrStatus,
2362 wakaba 1.82 %XHTML2CommonAttrStatus,
2363 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2364 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2365 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2366 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2367     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2368     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2369     scoped => FEATURE_HTML5_FD,
2370     title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2371     type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2372 wakaba 1.1 }),
2373 wakaba 1.40 check_start => sub {
2374     my ($self, $item, $element_state) = @_;
2375    
2376 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2377 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2378 wakaba 1.93 $type = 'text/css' unless defined $type;
2379     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2380     $type = "$1/$2";
2381     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2382     } else {
2383     ## NOTE: We don't know how parameters are handled by UAs. According to
2384     ## HTML5 specification, <style> with unknown parameters in |type=""|
2385     ## must be ignored.
2386     undef $type;
2387     }
2388     if (not defined $type) {
2389     $element_state->{allow_element} = 1; # invalid type=""
2390     } elsif ($type eq 'text/css') {
2391 wakaba 1.40 $element_state->{allow_element} = 0;
2392 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2393     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2394     # $element_state->{allow_element} = 1;
2395 wakaba 1.40 } else {
2396     $element_state->{allow_element} = 1; # unknown
2397     }
2398 wakaba 1.93 $element_state->{style_type} = $type;
2399 wakaba 1.79
2400     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2401     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2402 wakaba 1.107
2403     $element_state->{text} = '';
2404 wakaba 1.40 },
2405     check_child_element => sub {
2406     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2407     $child_is_transparent, $element_state) = @_;
2408 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2409     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2410 wakaba 1.40 $self->{onerror}->(node => $child_el,
2411     type => 'element not allowed:minus',
2412 wakaba 1.104 level => $self->{level}->{must});
2413 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2414     #
2415     } elsif ($element_state->{allow_element}) {
2416     #
2417     } else {
2418 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2419     level => $self->{level}->{must});
2420 wakaba 1.40 }
2421     },
2422     check_child_text => sub {
2423     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2424 wakaba 1.115 $element_state->{text} .= $child_node->data;
2425 wakaba 1.40 },
2426     check_end => sub {
2427     my ($self, $item, $element_state) = @_;
2428 wakaba 1.93 if (not defined $element_state->{style_type}) {
2429     ## NOTE: Invalid type=""
2430     #
2431     } elsif ($element_state->{style_type} eq 'text/css') {
2432 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2433     container_node => $item->{node},
2434 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2435 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2436     ## NOTE: XML content should be checked by THIS instance of checker
2437     ## as part of normal tree validation. However, we don't know of any
2438     ## XML-based styling language that can be used in HTML <style> element,
2439     ## such that we throw a "style language not supported" error.
2440 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2441     type => 'XML style lang',
2442     text => $element_state->{style_type},
2443     level => $self->{level}->{uncertain});
2444 wakaba 1.93 } else {
2445     ## NOTE: Should we raise some kind of error for,
2446     ## say, <style type="text/plaion">?
2447     $self->{onsubdoc}->({s => $element_state->{text},
2448     container_node => $item->{node},
2449     media_type => $element_state->{style_type},
2450     is_char_string => 1});
2451 wakaba 1.27 }
2452 wakaba 1.40
2453     $HTMLChecker{check_end}->(@_);
2454 wakaba 1.1 },
2455     };
2456 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2457 wakaba 1.1
2458     $Element->{$HTML_NS}->{body} = {
2459 wakaba 1.72 %HTMLFlowContentChecker,
2460 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2461 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2462     alink => $HTMLColorAttrChecker,
2463     background => $HTMLURIAttrChecker,
2464     bgcolor => $HTMLColorAttrChecker,
2465     link => $HTMLColorAttrChecker,
2466     text => $HTMLColorAttrChecker,
2467     vlink => $HTMLColorAttrChecker,
2468     }, {
2469 wakaba 1.49 %HTMLAttrStatus,
2470 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2471 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2472     background => FEATURE_M12N10_REC_DEPRECATED,
2473     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2474 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2475 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2476 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2477     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2478 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2479     vlink => FEATURE_M12N10_REC_DEPRECATED,
2480     }),
2481 wakaba 1.68 check_start => sub {
2482     my ($self, $item, $element_state) = @_;
2483    
2484     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2485 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2486     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2487 wakaba 1.68 },
2488 wakaba 1.1 };
2489    
2490     $Element->{$HTML_NS}->{section} = {
2491 wakaba 1.72 %HTMLFlowContentChecker,
2492 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2493 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2494     }, {
2495     %HTMLAttrStatus,
2496     %XHTML2CommonAttrStatus,
2497     }),
2498 wakaba 1.1 };
2499    
2500     $Element->{$HTML_NS}->{nav} = {
2501 wakaba 1.153 status => FEATURE_HTML5_LC,
2502 wakaba 1.72 %HTMLFlowContentChecker,
2503 wakaba 1.1 };
2504    
2505     $Element->{$HTML_NS}->{article} = {
2506 wakaba 1.153 status => FEATURE_HTML5_LC,
2507 wakaba 1.72 %HTMLFlowContentChecker,
2508 wakaba 1.1 };
2509    
2510     $Element->{$HTML_NS}->{blockquote} = {
2511 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2512 wakaba 1.72 %HTMLFlowContentChecker,
2513 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2514 wakaba 1.1 cite => $HTMLURIAttrChecker,
2515 wakaba 1.49 }, {
2516     %HTMLAttrStatus,
2517 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2518 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2519 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2520 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2521 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2522 wakaba 1.1 }),
2523 wakaba 1.66 check_start => sub {
2524     my ($self, $item, $element_state) = @_;
2525    
2526     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2527 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2528     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2529 wakaba 1.66 },
2530 wakaba 1.1 };
2531    
2532     $Element->{$HTML_NS}->{aside} = {
2533 wakaba 1.153 status => FEATURE_HTML5_LC,
2534 wakaba 1.72 %HTMLFlowContentChecker,
2535 wakaba 1.1 };
2536    
2537     $Element->{$HTML_NS}->{h1} = {
2538 wakaba 1.40 %HTMLPhrasingContentChecker,
2539 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2540 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2541     align => $GetHTMLEnumeratedAttrChecker->({
2542     left => 1, center => 1, right => 1, justify => 1,
2543     }),
2544     }, {
2545 wakaba 1.49 %HTMLAttrStatus,
2546 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2547 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2548 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2549 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2550 wakaba 1.49 }),
2551 wakaba 1.40 check_start => sub {
2552     my ($self, $item, $element_state) = @_;
2553     $self->{flag}->{has_hn} = 1;
2554 wakaba 1.79
2555     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2556     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2557 wakaba 1.1 },
2558     };
2559    
2560 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2561 wakaba 1.1
2562 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2563 wakaba 1.1
2564 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2565 wakaba 1.1
2566 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2567 wakaba 1.1
2568 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2569 wakaba 1.1
2570 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2571    
2572 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2573 wakaba 1.153 status => FEATURE_HTML5_LC,
2574 wakaba 1.72 %HTMLFlowContentChecker,
2575 wakaba 1.40 check_start => sub {
2576     my ($self, $item, $element_state) = @_;
2577     $self->_add_minus_elements ($element_state,
2578     {$HTML_NS => {qw/header 1 footer 1/}},
2579 wakaba 1.58 $HTMLSectioningContent);
2580 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2581     $self->{flag}->{has_hn} = 0;
2582 wakaba 1.79
2583     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2584     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2585 wakaba 1.40 },
2586     check_end => sub {
2587     my ($self, $item, $element_state) = @_;
2588     $self->_remove_minus_elements ($element_state);
2589     unless ($self->{flag}->{has_hn}) {
2590     $self->{onerror}->(node => $item->{node},
2591 wakaba 1.104 type => 'element missing:hn',
2592     level => $self->{level}->{must});
2593 wakaba 1.40 }
2594     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2595 wakaba 1.1
2596 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2597 wakaba 1.1 },
2598 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2599 wakaba 1.1 };
2600    
2601     $Element->{$HTML_NS}->{footer} = {
2602 wakaba 1.153 status => FEATURE_HTML5_LC,
2603 wakaba 1.72 %HTMLFlowContentChecker,
2604 wakaba 1.40 check_start => sub {
2605     my ($self, $item, $element_state) = @_;
2606     $self->_add_minus_elements ($element_state,
2607     {$HTML_NS => {footer => 1}},
2608 wakaba 1.58 $HTMLSectioningContent,
2609 wakaba 1.57 $HTMLHeadingContent);
2610 wakaba 1.79
2611     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2612     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2613 wakaba 1.40 },
2614     check_end => sub {
2615     my ($self, $item, $element_state) = @_;
2616     $self->_remove_minus_elements ($element_state);
2617 wakaba 1.1
2618 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2619 wakaba 1.1 },
2620     };
2621    
2622     $Element->{$HTML_NS}->{address} = {
2623 wakaba 1.72 %HTMLFlowContentChecker,
2624 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2625 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2626     ## TODO: add test
2627     #align => $GetHTMLEnumeratedAttrChecker->({
2628     # left => 1, center => 1, right => 1, justify => 1,
2629     #}),
2630     }, {
2631 wakaba 1.49 %HTMLAttrStatus,
2632 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2633 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2634 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2635 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2636     sdapref => FEATURE_HTML20_RFC,
2637 wakaba 1.49 }),
2638 wakaba 1.40 check_start => sub {
2639     my ($self, $item, $element_state) = @_;
2640     $self->_add_minus_elements ($element_state,
2641     {$HTML_NS => {footer => 1, address => 1}},
2642     $HTMLSectioningContent, $HTMLHeadingContent);
2643 wakaba 1.79
2644     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2645     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2646 wakaba 1.40 },
2647     check_end => sub {
2648     my ($self, $item, $element_state) = @_;
2649     $self->_remove_minus_elements ($element_state);
2650 wakaba 1.29
2651 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2652 wakaba 1.29 },
2653 wakaba 1.1 };
2654    
2655     $Element->{$HTML_NS}->{p} = {
2656 wakaba 1.40 %HTMLPhrasingContentChecker,
2657 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2658 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2659     align => $GetHTMLEnumeratedAttrChecker->({
2660     left => 1, center => 1, right => 1, justify => 1,
2661     }),
2662     }, {
2663 wakaba 1.49 %HTMLAttrStatus,
2664 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2665 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2666 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2667 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2668 wakaba 1.49 }),
2669 wakaba 1.1 };
2670    
2671     $Element->{$HTML_NS}->{hr} = {
2672 wakaba 1.40 %HTMLEmptyChecker,
2673 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2674 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2675     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2676     }, {
2677 wakaba 1.49 %HTMLAttrStatus,
2678     %HTMLM12NCommonAttrStatus,
2679     align => FEATURE_M12N10_REC_DEPRECATED,
2680 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2681 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2682 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2683 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2684     width => FEATURE_M12N10_REC_DEPRECATED,
2685     }),
2686 wakaba 1.1 };
2687    
2688     $Element->{$HTML_NS}->{br} = {
2689 wakaba 1.40 %HTMLEmptyChecker,
2690 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2691 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2692     clear => $GetHTMLEnumeratedAttrChecker->({
2693     left => 1, all => 1, right => 1, none => 1,
2694     }),
2695     }, {
2696 wakaba 1.49 %HTMLAttrStatus,
2697 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2698 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2699 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2700 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2701 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2702     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2703 wakaba 1.49 }),
2704 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2705     ## (This requirement is semantic so that we cannot check.)
2706 wakaba 1.1 };
2707    
2708     $Element->{$HTML_NS}->{dialog} = {
2709 wakaba 1.153 status => FEATURE_HTML5_WD,
2710 wakaba 1.40 %HTMLChecker,
2711     check_start => sub {
2712     my ($self, $item, $element_state) = @_;
2713     $element_state->{phase} = 'before dt';
2714 wakaba 1.79
2715     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2716     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2717 wakaba 1.40 },
2718     check_child_element => sub {
2719     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2720     $child_is_transparent, $element_state) = @_;
2721 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2722     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2723 wakaba 1.40 $self->{onerror}->(node => $child_el,
2724     type => 'element not allowed:minus',
2725 wakaba 1.104 level => $self->{level}->{must});
2726 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2727     #
2728     } elsif ($element_state->{phase} eq 'before dt') {
2729     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2730     $element_state->{phase} = 'before dd';
2731     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2732     $self->{onerror}
2733 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2734     text => 'dt',
2735     level => $self->{level}->{must});
2736 wakaba 1.40 $element_state->{phase} = 'before dt';
2737     } else {
2738 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2739     level => $self->{level}->{must});
2740 wakaba 1.40 }
2741     } elsif ($element_state->{phase} eq 'before dd') {
2742     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2743     $element_state->{phase} = 'before dt';
2744     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2745     $self->{onerror}
2746 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2747     text => 'dd',
2748     level => $self->{level}->{must});
2749 wakaba 1.40 $element_state->{phase} = 'before dd';
2750     } else {
2751 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2752     level => $self->{level}->{must});
2753 wakaba 1.1 }
2754 wakaba 1.40 } else {
2755     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2756     }
2757     },
2758     check_child_text => sub {
2759     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2760     if ($has_significant) {
2761 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2762     level => $self->{level}->{must});
2763 wakaba 1.1 }
2764 wakaba 1.40 },
2765     check_end => sub {
2766     my ($self, $item, $element_state) = @_;
2767     if ($element_state->{phase} eq 'before dd') {
2768     $self->{onerror}->(node => $item->{node},
2769 wakaba 1.104 type => 'child element missing',
2770     text => 'dd',
2771     level => $self->{level}->{must});
2772 wakaba 1.1 }
2773 wakaba 1.40
2774     $HTMLChecker{check_end}->(@_);
2775 wakaba 1.1 },
2776     };
2777    
2778     $Element->{$HTML_NS}->{pre} = {
2779 wakaba 1.40 %HTMLPhrasingContentChecker,
2780 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2781 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2782     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2783     }, {
2784 wakaba 1.49 %HTMLAttrStatus,
2785 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2786 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2787 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2788 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2789     }),
2790 wakaba 1.101 check_end => sub {
2791     my ($self, $item, $element_state) = @_;
2792    
2793     ## TODO: Flag to enable/disable IDL checking?
2794 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2795 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2796     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2797     ## NOTE: pre.code > code.idl-code: WebIDL spec
2798     ## NOTE: pre.idl-code: DOM1 spec
2799     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2800     ## NOTE: pre.schema: ReSpec-generated specs
2801 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2802     container_node => $item->{node},
2803     media_type => 'text/x-webidl',
2804     is_char_string => 1});
2805     }
2806    
2807 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2808 wakaba 1.101 },
2809 wakaba 1.1 };
2810    
2811     $Element->{$HTML_NS}->{ol} = {
2812 wakaba 1.40 %HTMLChecker,
2813 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2814 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2815 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2816 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2817 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2818 wakaba 1.69 ## TODO: HTML4 |type|
2819 wakaba 1.49 }, {
2820     %HTMLAttrStatus,
2821 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2822 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2823 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2824 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2825     reversed => FEATURE_HTML5_WD,
2826 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2827 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2828     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2829 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2830 wakaba 1.1 }),
2831 wakaba 1.40 check_child_element => sub {
2832     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2833     $child_is_transparent, $element_state) = @_;
2834 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2835     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2836 wakaba 1.40 $self->{onerror}->(node => $child_el,
2837     type => 'element not allowed:minus',
2838 wakaba 1.104 level => $self->{level}->{must});
2839 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2840     #
2841     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2842     #
2843     } else {
2844 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2845     level => $self->{level}->{must});
2846 wakaba 1.1 }
2847 wakaba 1.40 },
2848     check_child_text => sub {
2849     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2850     if ($has_significant) {
2851 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2852     level => $self->{level}->{must});
2853 wakaba 1.1 }
2854     },
2855     };
2856    
2857     $Element->{$HTML_NS}->{ul} = {
2858 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2859 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2860 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2861     compact => $GetHTMLBooleanAttrChecker->('compact'),
2862 wakaba 1.69 ## TODO: HTML4 |type|
2863     ## TODO: sdaform, align
2864 wakaba 1.68 }, {
2865 wakaba 1.49 %HTMLAttrStatus,
2866 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2867 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2868 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2869 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2870 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2871 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2872     }),
2873 wakaba 1.1 };
2874    
2875 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2876     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2877     %{$Element->{$HTML_NS}->{ul}},
2878     status => FEATURE_M12N10_REC_DEPRECATED,
2879 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2880     compact => $GetHTMLBooleanAttrChecker->('compact'),
2881     }, {
2882 wakaba 1.64 %HTMLAttrStatus,
2883     %HTMLM12NCommonAttrStatus,
2884     align => FEATURE_HTML2X_RFC,
2885     compact => FEATURE_M12N10_REC_DEPRECATED,
2886 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2887 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2888     sdapref => FEATURE_HTML20_RFC,
2889     }),
2890     };
2891    
2892 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2893 wakaba 1.72 %HTMLFlowContentChecker,
2894 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2895 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2896 wakaba 1.69 ## TODO: HTML4 |type|
2897 wakaba 1.49 value => sub {
2898 wakaba 1.1 my ($self, $attr) = @_;
2899 wakaba 1.152
2900     my $parent_is_ol;
2901 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2902     if (defined $parent) {
2903     my $parent_ns = $parent->namespace_uri;
2904     $parent_ns = '' unless defined $parent_ns;
2905     my $parent_ln = $parent->manakai_local_name;
2906 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2907     }
2908    
2909     unless ($parent_is_ol) {
2910     ## ISSUE: No "MUST" in the spec.
2911     $self->{onerror}->(node => $attr,
2912     type => 'non-ol li value',
2913     level => $self->{level}->{html5_fact});
2914 wakaba 1.1 }
2915 wakaba 1.152
2916 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2917 wakaba 1.131 },
2918 wakaba 1.49 }, {
2919     %HTMLAttrStatus,
2920 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2921 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2922 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2923 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2924 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2925 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2926 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2927 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2928 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2929 wakaba 1.1 }),
2930 wakaba 1.40 check_child_element => sub {
2931     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2932     $child_is_transparent, $element_state) = @_;
2933     if ($self->{flag}->{in_menu}) {
2934 wakaba 1.152 ## TODO: In <dir> element, then ...
2935 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2936     } else {
2937 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2938 wakaba 1.40 }
2939     },
2940     check_child_text => sub {
2941     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2942     if ($self->{flag}->{in_menu}) {
2943 wakaba 1.152 ## TODO: In <dir> element, then ...
2944 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2945 wakaba 1.1 } else {
2946 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
2947 wakaba 1.1 }
2948     },
2949     };
2950    
2951     $Element->{$HTML_NS}->{dl} = {
2952 wakaba 1.40 %HTMLChecker,
2953 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2954 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2955     compact => $GetHTMLBooleanAttrChecker->('compact'),
2956     }, {
2957 wakaba 1.49 %HTMLAttrStatus,
2958 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2959 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2960 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2961 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2962     sdapref => FEATURE_HTML20_RFC,
2963 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2964     }),
2965 wakaba 1.40 check_start => sub {
2966     my ($self, $item, $element_state) = @_;
2967     $element_state->{phase} = 'before dt';
2968 wakaba 1.79
2969     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2970     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2971 wakaba 1.40 },
2972     check_child_element => sub {
2973     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2974     $child_is_transparent, $element_state) = @_;
2975 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2976     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2977 wakaba 1.40 $self->{onerror}->(node => $child_el,
2978     type => 'element not allowed:minus',
2979 wakaba 1.104 level => $self->{level}->{must});
2980 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2981     #
2982     } elsif ($element_state->{phase} eq 'in dds') {
2983     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2984     #$element_state->{phase} = 'in dds';
2985     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2986     $element_state->{phase} = 'in dts';
2987     } else {
2988 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2989     level => $self->{level}->{must});
2990 wakaba 1.40 }
2991     } elsif ($element_state->{phase} eq 'in dts') {
2992     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2993     #$element_state->{phase} = 'in dts';
2994     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2995     $element_state->{phase} = 'in dds';
2996     } else {
2997 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2998     level => $self->{level}->{must});
2999 wakaba 1.40 }
3000     } elsif ($element_state->{phase} eq 'before dt') {
3001     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3002     $element_state->{phase} = 'in dts';
3003     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3004     $self->{onerror}
3005 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3006     text => 'dt',
3007     level => $self->{level}->{must});
3008 wakaba 1.40 $element_state->{phase} = 'in dds';
3009     } else {
3010 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3011     level => $self->{level}->{must});
3012 wakaba 1.1 }
3013 wakaba 1.40 } else {
3014     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3015 wakaba 1.1 }
3016 wakaba 1.40 },
3017     check_child_text => sub {
3018     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3019     if ($has_significant) {
3020 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3021     level => $self->{level}->{must});
3022 wakaba 1.40 }
3023     },
3024     check_end => sub {
3025     my ($self, $item, $element_state) = @_;
3026     if ($element_state->{phase} eq 'in dts') {
3027     $self->{onerror}->(node => $item->{node},
3028 wakaba 1.104 type => 'child element missing',
3029     text => 'dd',
3030     level => $self->{level}->{must});
3031 wakaba 1.1 }
3032    
3033 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3034 wakaba 1.1 },
3035     };
3036    
3037     $Element->{$HTML_NS}->{dt} = {
3038 wakaba 1.40 %HTMLPhrasingContentChecker,
3039 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3040 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3041     %HTMLAttrStatus,
3042 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3043 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3044 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3045 wakaba 1.49 }),
3046 wakaba 1.1 };
3047    
3048     $Element->{$HTML_NS}->{dd} = {
3049 wakaba 1.72 %HTMLFlowContentChecker,
3050 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3051 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3052     %HTMLAttrStatus,
3053 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3054 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3055 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3056 wakaba 1.49 }),
3057 wakaba 1.1 };
3058    
3059     $Element->{$HTML_NS}->{a} = {
3060 wakaba 1.123 %HTMLTransparentChecker,
3061 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3062 wakaba 1.40 check_attrs => sub {
3063     my ($self, $item, $element_state) = @_;
3064 wakaba 1.1 my %attr;
3065 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3066 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3067     $attr_ns = '' unless defined $attr_ns;
3068     my $attr_ln = $attr->manakai_local_name;
3069     my $checker;
3070 wakaba 1.73 my $status;
3071 wakaba 1.1 if ($attr_ns eq '') {
3072 wakaba 1.73 $status = {
3073     %HTMLAttrStatus,
3074 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3075 wakaba 1.73 accesskey => FEATURE_M12N10_REC,
3076     charset => FEATURE_M12N10_REC,
3077 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3078 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3079     dn => FEATURE_RFC2659,
3080 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3081 wakaba 1.153 FEATURE_M12N10_REC,
3082     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3083     FEATURE_M12N10_REC,
3084     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3085     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3086 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3087     name => FEATURE_M12N10_REC_DEPRECATED,
3088     nonce => FEATURE_RFC2659,
3089     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3090     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3091 wakaba 1.153 ping => FEATURE_HTML5_WD,
3092 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3093     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3094 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3095 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3096 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3097 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3098     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3099 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3100     }->{$attr_ln};
3101    
3102 wakaba 1.1 $checker = {
3103 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3104 wakaba 1.91 charset => sub {
3105     my ($self, $attr) = @_;
3106     $HTMLCharsetChecker->($attr->value, @_);
3107     },
3108 wakaba 1.70 ## TODO: HTML4 |coords|
3109 wakaba 1.1 target => $HTMLTargetAttrChecker,
3110     href => $HTMLURIAttrChecker,
3111     ping => $HTMLSpaceURIsAttrChecker,
3112 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3113 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3114 wakaba 1.70 ## TODO: HTML4 |shape|
3115 wakaba 1.1 media => $HTMLMQAttrChecker,
3116 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3117 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3118     type => $HTMLIMTAttrChecker,
3119     }->{$attr_ln};
3120     if ($checker) {
3121     $attr{$attr_ln} = $attr;
3122 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3123     $attr_ln !~ /[A-Z]/) {
3124 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3125     $status = $HTMLDatasetAttrStatus;
3126 wakaba 1.1 } else {
3127     $checker = $HTMLAttrChecker->{$attr_ln};
3128     }
3129     }
3130     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3131     || $AttrChecker->{$attr_ns}->{''};
3132 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3133     || $AttrStatus->{$attr_ns}->{''};
3134     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3135 wakaba 1.62
3136 wakaba 1.1 if ($checker) {
3137 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3138 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3139 wakaba 1.54 #
3140 wakaba 1.1 } else {
3141 wakaba 1.104 $self->{onerror}->(node => $attr,
3142     type => 'unknown attribute',
3143     level => $self->{level}->{uncertain});
3144 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3145 wakaba 1.1 }
3146 wakaba 1.49
3147 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3148 wakaba 1.1 }
3149    
3150 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3151 wakaba 1.4 if (defined $attr{href}) {
3152     $self->{has_hyperlink_element} = 1;
3153 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3154 wakaba 1.4 } else {
3155 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3156     if (defined $attr{$_}) {
3157     $self->{onerror}->(node => $attr{$_},
3158 wakaba 1.104 type => 'attribute not allowed',
3159     level => $self->{level}->{must});
3160 wakaba 1.1 }
3161     }
3162     }
3163 wakaba 1.66
3164     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3165 wakaba 1.1 },
3166 wakaba 1.40 check_start => sub {
3167     my ($self, $item, $element_state) = @_;
3168     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3169 wakaba 1.79
3170     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3171     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3172 wakaba 1.40 },
3173     check_end => sub {
3174     my ($self, $item, $element_state) = @_;
3175     $self->_remove_minus_elements ($element_state);
3176 wakaba 1.59 delete $self->{flag}->{in_a_href}
3177     unless $element_state->{in_a_href_original};
3178 wakaba 1.1
3179 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3180 wakaba 1.1 },
3181     };
3182    
3183     $Element->{$HTML_NS}->{q} = {
3184 wakaba 1.153 status => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3185 wakaba 1.40 %HTMLPhrasingContentChecker,
3186     check_attrs => $GetHTMLAttrsChecker->({
3187 wakaba 1.50 cite => $HTMLURIAttrChecker,
3188     }, {
3189 wakaba 1.49 %HTMLAttrStatus,
3190 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3191 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3192     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3193 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3194     sdasuff => FEATURE_HTML2X_RFC,
3195 wakaba 1.1 }),
3196 wakaba 1.66 check_start => sub {
3197     my ($self, $item, $element_state) = @_;
3198    
3199     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3200 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3201     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3202 wakaba 1.66 },
3203 wakaba 1.1 };
3204 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3205     ## placed inside the <code>q</code> element." Though we cannot test the
3206     ## element against this requirement since it incluides a semantic bit,
3207     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3208     ## the |q| element.
3209 wakaba 1.1
3210     $Element->{$HTML_NS}->{cite} = {
3211 wakaba 1.40 %HTMLPhrasingContentChecker,
3212 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3213 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3214     %HTMLAttrStatus,
3215 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3216 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3217 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3218 wakaba 1.49 }),
3219 wakaba 1.1 };
3220    
3221     $Element->{$HTML_NS}->{em} = {
3222 wakaba 1.40 %HTMLPhrasingContentChecker,
3223 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3224 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3225     %HTMLAttrStatus,
3226 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3227 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3228 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3229 wakaba 1.49 }),
3230 wakaba 1.1 };
3231    
3232     $Element->{$HTML_NS}->{strong} = {
3233 wakaba 1.40 %HTMLPhrasingContentChecker,
3234 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3235 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3236     %HTMLAttrStatus,
3237 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3238 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3239 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3240 wakaba 1.49 }),
3241 wakaba 1.1 };
3242    
3243     $Element->{$HTML_NS}->{small} = {
3244 wakaba 1.40 %HTMLPhrasingContentChecker,
3245 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3246 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3247     %HTMLAttrStatus,
3248     %HTMLM12NCommonAttrStatus,
3249 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3250 wakaba 1.49 }),
3251 wakaba 1.1 };
3252    
3253 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3254     %HTMLPhrasingContentChecker,
3255     status => FEATURE_M12N10_REC,
3256     check_attrs => $GetHTMLAttrsChecker->({}, {
3257     %HTMLAttrStatus,
3258     %HTMLM12NCommonAttrStatus,
3259 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3260 wakaba 1.51 }),
3261     };
3262    
3263 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3264 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3265 wakaba 1.40 %HTMLPhrasingContentChecker,
3266 wakaba 1.1 };
3267    
3268     $Element->{$HTML_NS}->{dfn} = {
3269 wakaba 1.40 %HTMLPhrasingContentChecker,
3270 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3271 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3272     %HTMLAttrStatus,
3273 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3274 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3275 wakaba 1.49 }),
3276 wakaba 1.40 check_start => sub {
3277     my ($self, $item, $element_state) = @_;
3278     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3279 wakaba 1.1
3280 wakaba 1.40 my $node = $item->{node};
3281 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3282     unless (defined $term) {
3283     for my $child (@{$node->child_nodes}) {
3284     if ($child->node_type == 1) { # ELEMENT_NODE
3285     if (defined $term) {
3286     undef $term;
3287     last;
3288     } elsif ($child->manakai_local_name eq 'abbr') {
3289     my $nsuri = $child->namespace_uri;
3290     if (defined $nsuri and $nsuri eq $HTML_NS) {
3291     my $attr = $child->get_attribute_node_ns (undef, 'title');
3292     if ($attr) {
3293     $term = $attr->value;
3294     }
3295     }
3296     }
3297     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3298     ## TEXT_NODE or CDATA_SECTION_NODE
3299 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3300 wakaba 1.1 next;
3301     }
3302     undef $term;
3303     last;
3304     }
3305     }
3306     unless (defined $term) {
3307     $term = $node->text_content;
3308     }
3309     }
3310     if ($self->{term}->{$term}) {
3311     push @{$self->{term}->{$term}}, $node;
3312     } else {
3313     $self->{term}->{$term} = [$node];
3314     }
3315 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3316     ## |ruby| unless |dfn| has |title|.
3317 wakaba 1.79
3318     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3319     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3320 wakaba 1.40 },
3321     check_end => sub {
3322     my ($self, $item, $element_state) = @_;
3323     $self->_remove_minus_elements ($element_state);
3324 wakaba 1.1
3325 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3326 wakaba 1.1 },
3327     };
3328    
3329     $Element->{$HTML_NS}->{abbr} = {
3330 wakaba 1.40 %HTMLPhrasingContentChecker,
3331 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3332 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3333     %HTMLAttrStatus,
3334 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3335     full => FEATURE_XHTML2_ED,
3336 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3337 wakaba 1.49 }),
3338 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3339     ## number (plural vs singular) must match the grammatical number of the
3340     ## contents of the element." Though this can be checked by machine,
3341     ## it requires language-specific knowledge and dictionary, such that
3342     ## we don't support the check of the requirement.
3343     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3344 wakaba 1.49 };
3345    
3346     $Element->{$HTML_NS}->{acronym} = {
3347     %HTMLPhrasingContentChecker,
3348     status => FEATURE_M12N10_REC,
3349     check_attrs => $GetHTMLAttrsChecker->({}, {
3350     %HTMLAttrStatus,
3351     %HTMLM12NCommonAttrStatus,
3352 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3353 wakaba 1.49 }),
3354 wakaba 1.1 };
3355    
3356     $Element->{$HTML_NS}->{time} = {
3357 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3358 wakaba 1.40 %HTMLPhrasingContentChecker,
3359     check_attrs => $GetHTMLAttrsChecker->({
3360 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3361 wakaba 1.49 }, {
3362     %HTMLAttrStatus,
3363     %HTMLM12NCommonAttrStatus,
3364 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3365 wakaba 1.1 }),
3366 wakaba 1.168 ## TODO: Update definition
3367 wakaba 1.1 ## TODO: Write tests
3368 wakaba 1.40 check_end => sub {
3369     my ($self, $item, $element_state) = @_;
3370 wakaba 1.1
3371 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3372 wakaba 1.1 my $input;
3373     my $reg_sp;
3374     my $input_node;
3375     if ($attr) {
3376     $input = $attr->value;
3377 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3378 wakaba 1.1 $input_node = $attr;
3379     } else {
3380 wakaba 1.40 $input = $item->{node}->text_content;
3381 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3382 wakaba 1.40 $input_node = $item->{node};
3383 wakaba 1.1
3384     ## ISSUE: What is the definition for "successfully extracts a date
3385     ## or time"? If the algorithm says the string is invalid but
3386     ## return some date or time, is it "successfully"?
3387     }
3388    
3389     my $hour;
3390     my $minute;
3391     my $second;
3392     if ($input =~ /
3393     \A
3394 wakaba 1.112 $reg_sp
3395 wakaba 1.1 ([0-9]+) # 1
3396     (?>
3397     -([0-9]+) # 2
3398 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3399     $reg_sp
3400 wakaba 1.1 (?>
3401     T
3402 wakaba 1.112 $reg_sp
3403 wakaba 1.1 )?
3404     ([0-9]+) # 4
3405     :([0-9]+) # 5
3406     (?>
3407     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3408     )?
3409 wakaba 1.112 $reg_sp
3410 wakaba 1.1 (?>
3411     Z
3412 wakaba 1.112 $reg_sp
3413 wakaba 1.1 |
3414     [+-]([0-9]+):([0-9]+) # 7, 8
3415 wakaba 1.112 $reg_sp
3416 wakaba 1.1 )?
3417     \z
3418     |
3419     :([0-9]+) # 9
3420     (?>
3421     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3422     )?
3423 wakaba 1.112 $reg_sp
3424     \z
3425 wakaba 1.1 )
3426     /x) {
3427     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3428     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3429     length $4 != 2 or length $5 != 2) {
3430     $self->{onerror}->(node => $input_node,
3431 wakaba 1.104 type => 'dateortime:syntax error',
3432     level => $self->{level}->{must});
3433 wakaba 1.1 }
3434    
3435     if (1 <= $2 and $2 <= 12) {
3436 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3437     level => $self->{level}->{must})
3438 wakaba 1.1 if $3 < 1 or
3439     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3440 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3441     level => $self->{level}->{must})
3442 wakaba 1.1 if $2 == 2 and $3 == 29 and
3443     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3444     } else {
3445     $self->{onerror}->(node => $input_node,
3446 wakaba 1.104 type => 'datetime:bad month',
3447     level => $self->{level}->{must});
3448 wakaba 1.1 }
3449    
3450     ($hour, $minute, $second) = ($4, $5, $6);
3451    
3452     if (defined $7) { ## [+-]hh:mm
3453     if (length $7 != 2 or length $8 != 2) {
3454     $self->{onerror}->(node => $input_node,
3455 wakaba 1.104 type => 'dateortime:syntax error',
3456     level => $self->{level}->{must});
3457 wakaba 1.1 }
3458    
3459     $self->{onerror}->(node => $input_node,
3460 wakaba 1.104 type => 'datetime:bad timezone hour',
3461     level => $self->{level}->{must})
3462 wakaba 1.1 if $7 > 23;
3463     $self->{onerror}->(node => $input_node,
3464 wakaba 1.104 type => 'datetime:bad timezone minute',
3465     level => $self->{level}->{must})
3466 wakaba 1.1 if $8 > 59;
3467     }
3468     } else { ## hh:mm
3469     if (length $1 != 2 or length $9 != 2) {
3470     $self->{onerror}->(node => $input_node,
3471 wakaba 1.104 type => qq'dateortime:syntax error',
3472     level => $self->{level}->{must});
3473 wakaba 1.1 }
3474    
3475     ($hour, $minute, $second) = ($1, $9, $10);
3476     }
3477    
3478 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3479     level => $self->{level}->{must}) if $hour > 23;
3480     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3481     level => $self->{level}->{must}) if $minute > 59;
3482 wakaba 1.1
3483     if (defined $second) { ## s
3484     ## NOTE: Integer part of second don't have to have length of two.
3485    
3486     if (substr ($second, 0, 1) eq '.') {
3487     $self->{onerror}->(node => $input_node,
3488 wakaba 1.104 type => 'dateortime:syntax error',
3489     level => $self->{level}->{must});
3490 wakaba 1.1 }
3491    
3492 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3493     level => $self->{level}->{must}) if $second >= 60;
3494 wakaba 1.1 }
3495     } else {
3496     $self->{onerror}->(node => $input_node,
3497 wakaba 1.104 type => 'dateortime:syntax error',
3498     level => $self->{level}->{must});
3499 wakaba 1.1 }
3500    
3501 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3502 wakaba 1.1 },
3503     };
3504    
3505     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3506 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3507 wakaba 1.113 ## TODO: content checking
3508     ## TODO: content or value must contain number (rev 2053)
3509 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3510 wakaba 1.40 %HTMLPhrasingContentChecker,
3511     check_attrs => $GetHTMLAttrsChecker->({
3512 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3513     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3514     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3515     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3516     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3517     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3518 wakaba 1.50 }, {
3519     %HTMLAttrStatus,
3520     high => FEATURE_HTML5_DEFAULT,
3521     low => FEATURE_HTML5_DEFAULT,
3522     max => FEATURE_HTML5_DEFAULT,
3523     min => FEATURE_HTML5_DEFAULT,
3524     optimum => FEATURE_HTML5_DEFAULT,
3525     value => FEATURE_HTML5_DEFAULT,
3526 wakaba 1.1 }),
3527     };
3528    
3529     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3530 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3531 wakaba 1.40 %HTMLPhrasingContentChecker,
3532     check_attrs => $GetHTMLAttrsChecker->({
3533 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3534     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3535 wakaba 1.50 }, {
3536     %HTMLAttrStatus,
3537     max => FEATURE_HTML5_DEFAULT,
3538     value => FEATURE_HTML5_DEFAULT,
3539 wakaba 1.1 }),
3540     };
3541    
3542     $Element->{$HTML_NS}->{code} = {
3543 wakaba 1.40 %HTMLPhrasingContentChecker,
3544 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3545 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3546     %HTMLAttrStatus,
3547 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3548 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3549 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3550 wakaba 1.49 }),
3551 wakaba 1.1 };
3552    
3553     $Element->{$HTML_NS}->{var} = {
3554 wakaba 1.40 %HTMLPhrasingContentChecker,
3555 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3556 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3557     %HTMLAttrStatus,
3558 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3559 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3560 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3561 wakaba 1.49 }),
3562 wakaba 1.1 };
3563    
3564     $Element->{$HTML_NS}->{samp} = {
3565 wakaba 1.40 %HTMLPhrasingContentChecker,
3566 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3567 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3568     %HTMLAttrStatus,
3569 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3570 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3571 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3572 wakaba 1.49 }),
3573 wakaba 1.1 };
3574    
3575     $Element->{$HTML_NS}->{kbd} = {
3576 wakaba 1.40 %HTMLPhrasingContentChecker,
3577 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3578 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3579     %HTMLAttrStatus,
3580 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3581 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3582 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3583 wakaba 1.49 }),
3584 wakaba 1.1 };
3585    
3586     $Element->{$HTML_NS}->{sub} = {
3587 wakaba 1.40 %HTMLPhrasingContentChecker,
3588 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3589 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3590     %HTMLAttrStatus,
3591 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3592 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3593 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3594 wakaba 1.49 }),
3595 wakaba 1.1 };
3596    
3597 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3598 wakaba 1.1
3599     $Element->{$HTML_NS}->{span} = {
3600 wakaba 1.40 %HTMLPhrasingContentChecker,
3601 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3602 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3603     %HTMLAttrStatus,
3604 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3605 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3606     dataformatas => FEATURE_HTML4_REC_RESERVED,
3607     datasrc => FEATURE_HTML4_REC_RESERVED,
3608 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3609 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3610 wakaba 1.49 }),
3611 wakaba 1.1 };
3612    
3613     $Element->{$HTML_NS}->{i} = {
3614 wakaba 1.40 %HTMLPhrasingContentChecker,
3615 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3616     check_attrs => $GetHTMLAttrsChecker->({}, {
3617     %HTMLAttrStatus,
3618     %HTMLM12NCommonAttrStatus,
3619 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3620 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3621 wakaba 1.49 }),
3622 wakaba 1.1 };
3623    
3624 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3625    
3626 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3627     %HTMLPhrasingContentChecker,
3628     status => FEATURE_M12N10_REC,
3629     check_attrs => $GetHTMLAttrsChecker->({}, {
3630     %HTMLAttrStatus,
3631     %HTMLM12NCommonAttrStatus,
3632 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3633 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3634     }),
3635     };
3636 wakaba 1.51
3637     $Element->{$HTML_NS}->{s} = {
3638 wakaba 1.40 %HTMLPhrasingContentChecker,
3639 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3640 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3641     %HTMLAttrStatus,
3642     %HTMLM12NCommonAttrStatus,
3643 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3644 wakaba 1.49 }),
3645 wakaba 1.1 };
3646    
3647 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3648    
3649     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3650    
3651 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3652 wakaba 1.40 %HTMLPhrasingContentChecker,
3653 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3654 wakaba 1.40 check_attrs => sub {
3655     my ($self, $item, $element_state) = @_;
3656 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3657     %HTMLAttrStatus,
3658 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3659     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3660     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3661     style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3662     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3663     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3664 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3665     sdasuff => FEATURE_HTML2X_RFC,
3666 wakaba 1.49 })->($self, $item, $element_state);
3667 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3668     $self->{onerror}->(node => $item->{node},
3669 wakaba 1.104 type => 'attribute missing',
3670     text => 'dir',
3671     level => $self->{level}->{must});
3672 wakaba 1.1 }
3673     },
3674     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3675     };
3676    
3677 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3678     %HTMLPhrasingContentChecker,
3679     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3680     check_attrs => $GetHTMLAttrsChecker->({}, {
3681     %HTMLAttrStatus,
3682     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3683 wakaba 1.153 lang => FEATURE_HTML5_WD,
3684 wakaba 1.99 }),
3685     check_start => sub {
3686     my ($self, $item, $element_state) = @_;
3687    
3688     $element_state->{phase} = 'before-rb';
3689     #$element_state->{has_sig}
3690 wakaba 1.100
3691     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3692     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3693 wakaba 1.99 },
3694     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3695     check_child_element => sub {
3696     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3697     $child_is_transparent, $element_state) = @_;
3698 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3699     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3700 wakaba 1.99 $self->{onerror}->(node => $child_el,
3701     type => 'element not allowed:minus',
3702 wakaba 1.104 level => $self->{level}->{must});
3703 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3704     #
3705     } elsif ($element_state->{phase} eq 'before-rb') {
3706     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3707     $element_state->{phase} = 'in-rb';
3708     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3709     $self->{onerror}->(node => $child_el,
3710 wakaba 1.104 level => $self->{level}->{should},
3711     type => 'no significant content before');
3712 wakaba 1.99 $element_state->{phase} = 'after-rt';
3713     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3714     $self->{onerror}->(node => $child_el,
3715 wakaba 1.104 level => $self->{level}->{should},
3716     type => 'no significant content before');
3717 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3718     } else {
3719     $self->{onerror}->(node => $child_el,
3720 wakaba 1.104 type => 'element not allowed:ruby base',
3721     level => $self->{level}->{must});
3722 wakaba 1.99 $element_state->{phase} = 'in-rb';
3723     }
3724     } elsif ($element_state->{phase} eq 'in-rb') {
3725     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3726     #$element_state->{phase} = 'in-rb';
3727     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3728     unless ($element_state->{has_significant}) {
3729     $self->{onerror}->(node => $child_el,
3730 wakaba 1.104 level => $self->{level}->{should},
3731     type => 'no significant content before');
3732 wakaba 1.99 }
3733     $element_state->{phase} = 'after-rt';
3734     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3735     unless ($element_state->{has_significant}) {
3736     $self->{onerror}->(node => $child_el,
3737 wakaba 1.104 level => $self->{level}->{should},
3738     type => 'no significant content before');
3739 wakaba 1.99 }
3740     $element_state->{phase} = 'after-rp1';
3741     } else {
3742     $self->{onerror}->(node => $child_el,
3743 wakaba 1.104 type => 'element not allowed:ruby base',
3744     level => $self->{level}->{must});
3745 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3746     }
3747     } elsif ($element_state->{phase} eq 'after-rt') {
3748     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3749     if ($element_state->{has_significant}) {
3750     $element_state->{has_sig} = 1;
3751     delete $element_state->{has_significant};
3752     }
3753     $element_state->{phase} = 'in-rb';
3754     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3755     $self->{onerror}->(node => $child_el,
3756 wakaba 1.104 level => $self->{level}->{should},
3757     type => 'no significant content before');
3758 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3759     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3760     $self->{onerror}->(node => $child_el,
3761 wakaba 1.104 level => $self->{level}->{should},
3762     type => 'no significant content before');
3763 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3764     } else {
3765     $self->{onerror}->(node => $child_el,
3766 wakaba 1.104 type => 'element not allowed:ruby base',
3767     level => $self->{level}->{must});
3768 wakaba 1.99 if ($element_state->{has_significant}) {
3769     $element_state->{has_sig} = 1;
3770     delete $element_state->{has_significant};
3771     }
3772     $element_state->{phase} = 'in-rb';
3773     }
3774     } elsif ($element_state->{phase} eq 'after-rp1') {
3775     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3776     $element_state->{phase} = 'after-rp-rt';
3777     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3778     $self->{onerror}->(node => $child_el,
3779 wakaba 1.104 type => 'ps element missing',
3780     text => 'rt',
3781     level => $self->{level}->{must});
3782 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3783     } else {
3784     $self->{onerror}->(node => $child_el,
3785 wakaba 1.104 type => 'ps element missing',
3786     text => 'rt',
3787     level => $self->{level}->{must});
3788 wakaba 1.99 $self->{onerror}->(node => $child_el,
3789 wakaba 1.104 type => 'ps element missing',
3790     text => 'rp',
3791     level => $self->{level}->{must});
3792 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3793     $self->{onerror}->(node => $child_el,
3794 wakaba 1.104 type => 'element not allowed:ruby base',
3795     level => $self->{level}->{must});
3796 wakaba 1.99 }
3797     if ($element_state->{has_significant}) {
3798     $element_state->{has_sig} = 1;
3799     delete $element_state->{has_significant};
3800     }
3801     $element_state->{phase} = 'in-rb';
3802     }
3803     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3804     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3805     $element_state->{phase} = 'after-rp2';
3806     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3807     $self->{onerror}->(node => $child_el,
3808 wakaba 1.104 type => 'ps element missing',
3809     text => 'rp',
3810     level => $self->{level}->{must});
3811 wakaba 1.99 $self->{onerror}->(node => $child_el,
3812 wakaba 1.104 level => $self->{level}->{should},
3813     type => 'no significant content before');
3814 wakaba 1.99 $element_state->{phase} = 'after-rt';
3815     } else {
3816     $self->{onerror}->(node => $child_el,
3817 wakaba 1.104 type => 'ps element missing',
3818     text => 'rp',
3819     level => $self->{level}->{must});
3820 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3821     $self->{onerror}->(node => $child_el,
3822 wakaba 1.104 type => 'element not allowed:ruby base',
3823     level => $self->{level}->{must});
3824 wakaba 1.99 }
3825     if ($element_state->{has_significant}) {
3826     $element_state->{has_sig} = 1;
3827     delete $element_state->{has_significant};
3828     }
3829     $element_state->{phase} = 'in-rb';
3830     }
3831     } elsif ($element_state->{phase} eq 'after-rp2') {
3832     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3833     if ($element_state->{has_significant}) {
3834     $element_state->{has_sig} = 1;
3835     delete $element_state->{has_significant};
3836     }
3837     $element_state->{phase} = 'in-rb';
3838     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3839     $self->{onerror}->(node => $child_el,
3840 wakaba 1.104 level => $self->{level}->{should},
3841     type => 'no significant content before');
3842 wakaba 1.99 $element_state->{phase} = 'after-rt';
3843     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3844     $self->{onerror}->(node => $child_el,
3845 wakaba 1.104 level => $self->{level}->{should},
3846     type => 'no significant content before');
3847 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3848     } else {
3849     $self->{onerror}->(node => $child_el,
3850 wakaba 1.104 type => 'element not allowed:ruby base',
3851     level => $self->{level}->{must});
3852 wakaba 1.99 if ($element_state->{has_significant}) {
3853     $element_state->{has_sig} = 1;
3854     delete $element_state->{has_significant};
3855     }
3856     $element_state->{phase} = 'in-rb';
3857     }
3858     } else {
3859     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3860     }
3861     },
3862     check_child_text => sub {
3863     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3864     if ($has_significant) {
3865     if ($element_state->{phase} eq 'before-rb') {
3866     $element_state->{phase} = 'in-rb';
3867     } elsif ($element_state->{phase} eq 'in-rb') {
3868     #
3869     } elsif ($element_state->{phase} eq 'after-rt' or
3870     $element_state->{phase} eq 'after-rp2') {
3871     $element_state->{phase} = 'in-rb';
3872     } elsif ($element_state->{phase} eq 'after-rp1') {
3873     $self->{onerror}->(node => $child_node,
3874 wakaba 1.104 type => 'ps element missing',
3875     text => 'rt',
3876     level => $self->{level}->{must});
3877 wakaba 1.99 $self->{onerror}->(node => $child_node,
3878 wakaba 1.104 type => 'ps element missing',
3879     text => 'rp',
3880     level => $self->{level}->{must});
3881 wakaba 1.99 $element_state->{phase} = 'in-rb';
3882     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3883     $self->{onerror}->(node => $child_node,
3884 wakaba 1.104 type => 'ps element missing',
3885     text => 'rp',
3886     level => $self->{level}->{must});
3887 wakaba 1.99 $element_state->{phase} = 'in-rb';
3888     } else {
3889     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3890     }
3891     }
3892     },
3893     check_end => sub {
3894     my ($self, $item, $element_state) = @_;
3895     $self->_remove_minus_elements ($element_state);
3896    
3897     if ($element_state->{phase} eq 'before-rb') {
3898     $self->{onerror}->(node => $item->{node},
3899 wakaba 1.104 level => $self->{level}->{should},
3900 wakaba 1.99 type => 'no significant content');
3901     $self->{onerror}->(node => $item->{node},
3902 wakaba 1.104 type => 'element missing',
3903     text => 'rt',
3904     level => $self->{level}->{must});
3905 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3906     unless ($element_state->{has_significant}) {
3907     $self->{onerror}->(node => $item->{node},
3908 wakaba 1.104 level => $self->{level}->{should},
3909     type => 'no significant content at the end');
3910 wakaba 1.99 }
3911     $self->{onerror}->(node => $item->{node},
3912 wakaba 1.104 type => 'element missing',
3913     text => 'rt',
3914     level => $self->{level}->{must});
3915 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3916     $element_state->{phase} eq 'after-rp2') {
3917     #
3918     } elsif ($element_state->{phase} eq 'after-rp1') {
3919     $self->{onerror}->(node => $item->{node},
3920 wakaba 1.104 type => 'element missing',
3921     text => 'rt',
3922     level => $self->{level}->{must});
3923 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3924 wakaba 1.104 type => 'element missing',
3925     text => 'rp',
3926     level => $self->{level}->{must});
3927 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3928     $self->{onerror}->(node => $item->{node},
3929 wakaba 1.104 type => 'element missing',
3930     text => 'rp',
3931     level => $self->{level}->{must});
3932 wakaba 1.99 } else {
3933     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3934     }
3935    
3936     ## NOTE: A modified version of |check_end| of %AnyChecker.
3937     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3938     $item->{real_parent_state}->{has_significant} = 1;
3939     }
3940     },
3941     };
3942    
3943     $Element->{$HTML_NS}->{rt} = {
3944     %HTMLPhrasingContentChecker,
3945     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3946     check_attrs => $GetHTMLAttrsChecker->({}, {
3947     %HTMLAttrStatus,
3948     %HTMLM12NXHTML2CommonAttrStatus,
3949 wakaba 1.153 lang => FEATURE_HTML5_WD,
3950 wakaba 1.99 }),
3951     };
3952    
3953     $Element->{$HTML_NS}->{rp} = {
3954     %HTMLTextChecker,
3955     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3956     check_attrs => $GetHTMLAttrsChecker->({}, {
3957     %HTMLAttrStatus,
3958     %HTMLM12NXHTML2CommonAttrStatus,
3959 wakaba 1.153 lang => FEATURE_HTML5_WD,
3960 wakaba 1.99 }),
3961 wakaba 1.100 check_start => sub {
3962 wakaba 1.99 my ($self, $item, $element_state) = @_;
3963     $element_state->{text} = '';
3964 wakaba 1.100
3965     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3966     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3967 wakaba 1.99 },
3968     check_child_text => sub {
3969     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3970     if ($has_significant) {
3971     $element_state->{text} .= $child_node->data;
3972     ## NOTE: |<rp> <!---->(</rp>| is allowed.
3973     }
3974     },
3975     check_end => sub {
3976     my ($self, $item, $element_state) = @_;
3977    
3978     my $p_class = ($item->{parent_state} and
3979     $item->{parent_state}->{phase} and
3980     $item->{parent_state}->{phase} eq 'after-rp2')
3981     ? qr/\p{Pe}/ : qr/\p{Ps}/;
3982     if ($element_state->{text} =~ /\A$p_class\z/) {
3983 wakaba 1.132 #=~ /\A[\x09\x0A\x0C\x0D\x20]*${p_class}[\x09\x0A\x0C\x0D\x20]*\z/) {
3984 wakaba 1.99 #
3985     } else {
3986     $self->{onerror}->(node => $item->{node},
3987 wakaba 1.104 type => 'rp:syntax error',
3988     level => $self->{level}->{must});
3989 wakaba 1.99 }
3990    
3991     $HTMLTextChecker{check_end}->(@_);
3992     },
3993     };
3994    
3995 wakaba 1.29 =pod
3996    
3997     ## TODO:
3998    
3999     +
4000     + <p>Partly because of the confusion described above, authors are
4001     + strongly recommended to always mark up all paragraphs with the
4002     + <code>p</code> element, and to not have any <code>ins</code> or
4003     + <code>del</code> elements that cross across any <span
4004     + title="paragraph">implied paragraphs</span>.</p>
4005     +
4006     (An informative note)
4007    
4008     <p><code>ins</code> elements should not cross <span
4009     + title="paragraph">implied paragraph</span> boundaries.</p>
4010     (normative)
4011    
4012     + <p><code>del</code> elements should not cross <span
4013     + title="paragraph">implied paragraph</span> boundaries.</p>
4014     (normative)
4015    
4016     =cut
4017    
4018 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4019 wakaba 1.40 %HTMLTransparentChecker,
4020 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4021 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4022 wakaba 1.1 cite => $HTMLURIAttrChecker,
4023 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4024 wakaba 1.49 }, {
4025     %HTMLAttrStatus,
4026     %HTMLM12NCommonAttrStatus,
4027 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4028 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4029     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4030 wakaba 1.1 }),
4031 wakaba 1.66 check_start => sub {
4032     my ($self, $item, $element_state) = @_;
4033    
4034     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4035 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4036     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4037 wakaba 1.66 },
4038 wakaba 1.1 };
4039    
4040     $Element->{$HTML_NS}->{del} = {
4041 wakaba 1.40 %HTMLTransparentChecker,
4042 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4043 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4044 wakaba 1.1 cite => $HTMLURIAttrChecker,
4045 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4046 wakaba 1.49 }, {
4047     %HTMLAttrStatus,
4048     %HTMLM12NCommonAttrStatus,
4049 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4050 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4051     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4052 wakaba 1.1 }),
4053 wakaba 1.40 check_end => sub {
4054     my ($self, $item, $element_state) = @_;
4055     if ($element_state->{has_significant}) {
4056     ## NOTE: Significantness flag does not propagate.
4057     } elsif ($item->{transparent}) {
4058     #
4059     } else {
4060     $self->{onerror}->(node => $item->{node},
4061 wakaba 1.104 level => $self->{level}->{should},
4062 wakaba 1.40 type => 'no significant content');
4063     }
4064 wakaba 1.1 },
4065 wakaba 1.66 check_start => sub {
4066     my ($self, $item, $element_state) = @_;
4067    
4068     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4069 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4070     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4071 wakaba 1.66 },
4072 wakaba 1.1 };
4073    
4074 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4075 wakaba 1.72 %HTMLFlowContentChecker,
4076 wakaba 1.153 status => FEATURE_HTML5_WD,
4077 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4078 wakaba 1.41 check_child_element => sub {
4079     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4080     $child_is_transparent, $element_state) = @_;
4081 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4082     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4083 wakaba 1.41 $self->{onerror}->(node => $child_el,
4084     type => 'element not allowed:minus',
4085 wakaba 1.104 level => $self->{level}->{must});
4086 wakaba 1.41 $element_state->{has_non_legend} = 1;
4087     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4088     #
4089     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4090     if ($element_state->{has_legend_at_first}) {
4091     $self->{onerror}->(node => $child_el,
4092     type => 'element not allowed:figure legend',
4093 wakaba 1.104 level => $self->{level}->{must});
4094 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4095     $self->{onerror}->(node => $element_state->{has_legend},
4096     type => 'element not allowed:figure legend',
4097 wakaba 1.104 level => $self->{level}->{must});
4098 wakaba 1.41 $element_state->{has_legend} = $child_el;
4099     } elsif ($element_state->{has_non_legend}) {
4100     $element_state->{has_legend} = $child_el;
4101     } else {
4102     $element_state->{has_legend_at_first} = 1;
4103 wakaba 1.35 }
4104 wakaba 1.41 delete $element_state->{has_non_legend};
4105     } else {
4106 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4107 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4108 wakaba 1.41 }
4109     },
4110     check_child_text => sub {
4111     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4112     if ($has_significant) {
4113     $element_state->{has_non_legend} = 1;
4114 wakaba 1.35 }
4115 wakaba 1.170
4116     $element_state->{in_figure} = 1;
4117 wakaba 1.41 },
4118     check_end => sub {
4119     my ($self, $item, $element_state) = @_;
4120 wakaba 1.35
4121 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4122     #
4123     } elsif ($element_state->{has_legend}) {
4124     if ($element_state->{has_non_legend}) {
4125     $self->{onerror}->(node => $element_state->{has_legend},
4126 wakaba 1.35 type => 'element not allowed:figure legend',
4127 wakaba 1.104 level => $self->{level}->{must});
4128 wakaba 1.35 }
4129     }
4130 wakaba 1.41
4131 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4132 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4133 wakaba 1.35 },
4134     };
4135 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4136 wakaba 1.1
4137 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4138     my ($self, $attr) = @_;
4139 wakaba 1.104 $self->{onerror}->(node => $attr,
4140     type => 'unknown attribute',
4141     level => $self->{level}->{uncertain});
4142 wakaba 1.92 };
4143    
4144 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4145 wakaba 1.40 %HTMLEmptyChecker,
4146 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4147 wakaba 1.40 check_attrs => sub {
4148     my ($self, $item, $element_state) = @_;
4149 wakaba 1.1 $GetHTMLAttrsChecker->({
4150 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4151     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4152     }),
4153 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4154 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4155 wakaba 1.1 src => $HTMLURIAttrChecker,
4156     usemap => $HTMLUsemapAttrChecker,
4157 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4158 wakaba 1.1 ismap => sub {
4159 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4160     if (not $self->{flag}->{in_a_href}) {
4161 wakaba 1.15 $self->{onerror}->(node => $attr,
4162 wakaba 1.59 type => 'attribute not allowed:ismap',
4163 wakaba 1.104 level => $self->{level}->{must});
4164 wakaba 1.1 }
4165 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4166 wakaba 1.1 },
4167 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4168     ## TODO: HTML4 |name|
4169 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4170 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4171 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4172 wakaba 1.49 }, {
4173     %HTMLAttrStatus,
4174 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4175 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4176 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4177 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4178 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4179 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4180 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4181     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4182 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4183     name => FEATURE_M12N10_REC_DEPRECATED,
4184 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4185 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4186     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4187 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4188 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4189 wakaba 1.66 })->($self, $item, $element_state);
4190 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4191     $self->{onerror}->(node => $item->{node},
4192 wakaba 1.104 type => 'attribute missing',
4193     text => 'alt',
4194     level => $self->{level}->{should});
4195 wakaba 1.114 ## TODO: ...
4196 wakaba 1.1 }
4197 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4198     $self->{onerror}->(node => $item->{node},
4199 wakaba 1.104 type => 'attribute missing',
4200     text => 'src',
4201     level => $self->{level}->{must});
4202 wakaba 1.1 }
4203 wakaba 1.66
4204 wakaba 1.114 ## TODO: external resource check
4205    
4206 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4207     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4208     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4209     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4210 wakaba 1.1 },
4211     };
4212    
4213     $Element->{$HTML_NS}->{iframe} = {
4214 wakaba 1.40 %HTMLTextChecker,
4215 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4216 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4217 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4218 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4219 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4220 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4221     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4222     }),
4223     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4224 wakaba 1.1 src => $HTMLURIAttrChecker,
4225 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4226 wakaba 1.49 }, {
4227     %HTMLAttrStatus,
4228     %HTMLM12NCommonAttrStatus,
4229     align => FEATURE_XHTML10_REC,
4230 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4231 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4232 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4233     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4234 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4235     marginheight => FEATURE_M12N10_REC,
4236     marginwidth => FEATURE_M12N10_REC,
4237 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4238     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4239     sandbox => FEATURE_HTML5_WD,
4240 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4241 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4242     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4243 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4244     width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4245 wakaba 1.1 }),
4246 wakaba 1.66 check_start => sub {
4247     my ($self, $item, $element_state) = @_;
4248    
4249     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4250 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4251     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4252 wakaba 1.66 },
4253 wakaba 1.40 };
4254    
4255 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4256 wakaba 1.40 %HTMLEmptyChecker,
4257 wakaba 1.98 status => FEATURE_HTML5_WD,
4258 wakaba 1.40 check_attrs => sub {
4259     my ($self, $item, $element_state) = @_;
4260 wakaba 1.1 my $has_src;
4261 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4262 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4263     $attr_ns = '' unless defined $attr_ns;
4264     my $attr_ln = $attr->manakai_local_name;
4265     my $checker;
4266 wakaba 1.73
4267     my $status = {
4268     %HTMLAttrStatus,
4269 wakaba 1.153 height => FEATURE_HTML5_LC,
4270 wakaba 1.98 src => FEATURE_HTML5_WD,
4271     type => FEATURE_HTML5_WD,
4272 wakaba 1.153 width => FEATURE_HTML5_LC,
4273 wakaba 1.73 }->{$attr_ln};
4274    
4275 wakaba 1.1 if ($attr_ns eq '') {
4276     if ($attr_ln eq 'src') {
4277     $checker = $HTMLURIAttrChecker;
4278     $has_src = 1;
4279     } elsif ($attr_ln eq 'type') {
4280     $checker = $HTMLIMTAttrChecker;
4281 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4282     $checker = $AttrCheckerNotImplemented; ## TODO: because spec does not define them yet.
4283 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4284     $attr_ln !~ /[A-Z]/) {
4285 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4286     $status = $HTMLDatasetAttrStatus;
4287 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4288 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4289 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4290 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4291     || sub { }; ## NOTE: Any local attribute is ok.
4292 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4293 wakaba 1.117 } else {
4294     $checker = $HTMLAttrChecker->{$attr_ln};
4295 wakaba 1.1 }
4296     }
4297     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4298 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4299     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4300     || $AttrStatus->{$attr_ns}->{''};
4301     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4302 wakaba 1.62
4303 wakaba 1.1 if ($checker) {
4304 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4305 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4306 wakaba 1.54 #
4307 wakaba 1.1 } else {
4308 wakaba 1.104 $self->{onerror}->(node => $attr,
4309     type => 'unknown attribute',
4310     level => $self->{level}->{uncertain});
4311 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4312     }
4313    
4314 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4315 wakaba 1.1 }
4316    
4317     unless ($has_src) {
4318 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4319 wakaba 1.104 type => 'attribute missing',
4320     text => 'src',
4321 wakaba 1.114 level => $self->{level}->{info});
4322     ## NOTE: <embed> without src="" is allowed since revision 1929.
4323     ## We issues an informational message since <embed> w/o src=""
4324     ## is likely an authoring error.
4325 wakaba 1.1 }
4326 wakaba 1.114
4327     ## TODO: external resource check
4328 wakaba 1.66
4329     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4330 wakaba 1.1 },
4331     };
4332    
4333 wakaba 1.49 ## TODO:
4334     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4335     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4336    
4337 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4338 wakaba 1.40 %HTMLTransparentChecker,
4339 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4340 wakaba 1.40 check_attrs => sub {
4341     my ($self, $item, $element_state) = @_;
4342 wakaba 1.1 $GetHTMLAttrsChecker->({
4343 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4344     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4345     }),
4346     archive => $HTMLSpaceURIsAttrChecker,
4347     ## TODO: Relative to @codebase
4348     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4349     classid => $HTMLURIAttrChecker,
4350     codebase => $HTMLURIAttrChecker,
4351     codetype => $HTMLIMTAttrChecker,
4352     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4353 wakaba 1.1 data => $HTMLURIAttrChecker,
4354 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4355     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4356     ## [HTML4] but we don't know how to test this.
4357 wakaba 1.167 form => $HTMLFormAttrChecker,
4358 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4359 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4360 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4361     ## the name of the browsing context created by the element,
4362     ## if any, but is also used as the form control name of the
4363     ## form control provided by the plugin, if any.
4364 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4365 wakaba 1.1 type => $HTMLIMTAttrChecker,
4366     usemap => $HTMLUsemapAttrChecker,
4367 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4368 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4369 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4370 wakaba 1.49 }, {
4371     %HTMLAttrStatus,
4372 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4373 wakaba 1.49 align => FEATURE_XHTML10_REC,
4374 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4375 wakaba 1.49 border => FEATURE_XHTML10_REC,
4376     classid => FEATURE_M12N10_REC,
4377     codebase => FEATURE_M12N10_REC,
4378     codetype => FEATURE_M12N10_REC,
4379 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4380 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4381 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4382     dataformatas => FEATURE_HTML4_REC_RESERVED,
4383     datasrc => FEATURE_HTML4_REC_RESERVED,
4384 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4385 wakaba 1.167 form => FEATURE_HTML5_DEFAULT,
4386 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4387 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4388 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4389     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4390 wakaba 1.49 standby => FEATURE_M12N10_REC,
4391 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4392 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4393     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4394 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4395 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4396 wakaba 1.66 })->($self, $item, $element_state);
4397 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4398     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4399     $self->{onerror}->(node => $item->{node},
4400 wakaba 1.104 type => 'attribute missing:data|type',
4401     level => $self->{level}->{must});
4402 wakaba 1.1 }
4403     }
4404 wakaba 1.66
4405     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4406     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4407     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4408     ## TODO: archive
4409     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4410 wakaba 1.1 },
4411 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4412 wakaba 1.41 check_child_element => sub {
4413     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4414     $child_is_transparent, $element_state) = @_;
4415 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4416     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4417 wakaba 1.41 $self->{onerror}->(node => $child_el,
4418     type => 'element not allowed:minus',
4419 wakaba 1.104 level => $self->{level}->{must});
4420 wakaba 1.41 $element_state->{has_non_legend} = 1;
4421     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4422     #
4423     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4424     if ($element_state->{has_non_param}) {
4425 wakaba 1.104 $self->{onerror}->(node => $child_el,
4426 wakaba 1.72 type => 'element not allowed:flow',
4427 wakaba 1.104 level => $self->{level}->{must});
4428 wakaba 1.39 }
4429 wakaba 1.41 } else {
4430 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4431 wakaba 1.41 $element_state->{has_non_param} = 1;
4432 wakaba 1.39 }
4433 wakaba 1.25 },
4434 wakaba 1.41 check_child_text => sub {
4435     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4436     if ($has_significant) {
4437     $element_state->{has_non_param} = 1;
4438     }
4439 wakaba 1.42 },
4440     check_end => sub {
4441     my ($self, $item, $element_state) = @_;
4442     if ($element_state->{has_significant}) {
4443 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4444 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4445     ## NOTE: Transparent.
4446     } else {
4447     $self->{onerror}->(node => $item->{node},
4448 wakaba 1.104 level => $self->{level}->{should},
4449 wakaba 1.42 type => 'no significant content');
4450     }
4451     },
4452 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4453 wakaba 1.1 };
4454 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4455     ## What about |<section><object data><style scoped></style>x</object></section>|?
4456     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4457 wakaba 1.1
4458     $Element->{$HTML_NS}->{param} = {
4459 wakaba 1.40 %HTMLEmptyChecker,
4460 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4461 wakaba 1.40 check_attrs => sub {
4462     my ($self, $item, $element_state) = @_;
4463 wakaba 1.1 $GetHTMLAttrsChecker->({
4464     name => sub { },
4465 wakaba 1.70 type => $HTMLIMTAttrChecker,
4466 wakaba 1.1 value => sub { },
4467 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4468     data => 1, ref => 1, object => 1,
4469     }),
4470 wakaba 1.49 }, {
4471     %HTMLAttrStatus,
4472 wakaba 1.154 href => FEATURE_RDFA_REC,
4473 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4474     name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4475 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4476 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4477 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4478 wakaba 1.66 })->(@_);
4479 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4480     $self->{onerror}->(node => $item->{node},
4481 wakaba 1.104 type => 'attribute missing',
4482     text => 'name',
4483     level => $self->{level}->{must});
4484 wakaba 1.1 }
4485 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4486     $self->{onerror}->(node => $item->{node},
4487 wakaba 1.104 type => 'attribute missing',
4488     text => 'value',
4489     level => $self->{level}->{must});
4490 wakaba 1.1 }
4491     },
4492     };
4493    
4494     $Element->{$HTML_NS}->{video} = {
4495 wakaba 1.40 %HTMLTransparentChecker,
4496 wakaba 1.48 status => FEATURE_HTML5_LC,
4497 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4498 wakaba 1.1 src => $HTMLURIAttrChecker,
4499     ## TODO: start, loopstart, loopend, end
4500     ## ISSUE: they MUST be "value time offset"s. Value?
4501 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4502 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4503     controls => $GetHTMLBooleanAttrChecker->('controls'),
4504 wakaba 1.59 poster => $HTMLURIAttrChecker,
4505 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4506     width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4507 wakaba 1.50 }, {
4508     %HTMLAttrStatus,
4509     autoplay => FEATURE_HTML5_LC,
4510     controls => FEATURE_HTML5_LC,
4511 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4512 wakaba 1.50 height => FEATURE_HTML5_LC,
4513 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4514     loopstart => FEATURE_HTML5_AT_RISK,
4515     playcount => FEATURE_HTML5_AT_RISK,
4516 wakaba 1.50 poster => FEATURE_HTML5_LC,
4517     src => FEATURE_HTML5_LC,
4518 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4519 wakaba 1.50 width => FEATURE_HTML5_LC,
4520 wakaba 1.1 }),
4521 wakaba 1.42 check_start => sub {
4522     my ($self, $item, $element_state) = @_;
4523     $element_state->{allow_source}
4524     = not $item->{node}->has_attribute_ns (undef, 'src');
4525     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4526     ## NOTE: It might be set true by |check_element|.
4527 wakaba 1.66
4528     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4529     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4530 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4531     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4532 wakaba 1.42 },
4533     check_child_element => sub {
4534     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4535     $child_is_transparent, $element_state) = @_;
4536 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4537     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4538 wakaba 1.42 $self->{onerror}->(node => $child_el,
4539     type => 'element not allowed:minus',
4540 wakaba 1.104 level => $self->{level}->{must});
4541 wakaba 1.42 delete $element_state->{allow_source};
4542     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4543     #
4544     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4545 wakaba 1.45 unless ($element_state->{allow_source}) {
4546 wakaba 1.104 $self->{onerror}->(node => $child_el,
4547 wakaba 1.72 type => 'element not allowed:flow',
4548 wakaba 1.104 level => $self->{level}->{must});
4549 wakaba 1.42 }
4550 wakaba 1.45 $element_state->{has_source} = 1;
4551 wakaba 1.1 } else {
4552 wakaba 1.42 delete $element_state->{allow_source};
4553 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4554 wakaba 1.42 }
4555     },
4556     check_child_text => sub {
4557     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4558     if ($has_significant) {
4559     delete $element_state->{allow_source};
4560     }
4561 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4562 wakaba 1.42 },
4563     check_end => sub {
4564     my ($self, $item, $element_state) = @_;
4565     if ($element_state->{has_source} == -1) {
4566     $self->{onerror}->(node => $item->{node},
4567 wakaba 1.104 type => 'child element missing',
4568     text => 'source',
4569     level => $self->{level}->{must});
4570 wakaba 1.1 }
4571 wakaba 1.42
4572     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4573 wakaba 1.1 },
4574     };
4575    
4576     $Element->{$HTML_NS}->{audio} = {
4577 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4578 wakaba 1.48 status => FEATURE_HTML5_LC,
4579 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4580     src => $HTMLURIAttrChecker,
4581     ## TODO: start, loopstart, loopend, end
4582     ## ISSUE: they MUST be "value time offset"s. Value?
4583     ## ISSUE: playcount has no conformance creteria
4584     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4585     controls => $GetHTMLBooleanAttrChecker->('controls'),
4586 wakaba 1.50 }, {
4587     %HTMLAttrStatus,
4588     autoplay => FEATURE_HTML5_LC,
4589     controls => FEATURE_HTML5_LC,
4590 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4591     loopend => FEATURE_HTML5_AT_RISK,
4592     loopstart => FEATURE_HTML5_AT_RISK,
4593     playcount => FEATURE_HTML5_AT_RISK,
4594 wakaba 1.50 src => FEATURE_HTML5_LC,
4595 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4596 wakaba 1.42 }),
4597 wakaba 1.1 };
4598    
4599     $Element->{$HTML_NS}->{source} = {
4600 wakaba 1.40 %HTMLEmptyChecker,
4601 wakaba 1.153 status => FEATURE_HTML5_LC,
4602 wakaba 1.40 check_attrs => sub {
4603     my ($self, $item, $element_state) = @_;
4604 wakaba 1.1 $GetHTMLAttrsChecker->({
4605 wakaba 1.90 media => $HTMLMQAttrChecker,
4606     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4607     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4608 wakaba 1.1 type => $HTMLIMTAttrChecker,
4609 wakaba 1.50 }, {
4610     %HTMLAttrStatus,
4611 wakaba 1.153 media => FEATURE_HTML5_LC,
4612     pixelratio => FEATURE_HTML5_LC,
4613     src => FEATURE_HTML5_LC,
4614     type => FEATURE_HTML5_LC,
4615 wakaba 1.66 })->(@_);
4616 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4617     $self->{onerror}->(node => $item->{node},
4618 wakaba 1.104 type => 'attribute missing',
4619     text => 'src',
4620     level => $self->{level}->{must});
4621 wakaba 1.1 }
4622 wakaba 1.66
4623     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4624 wakaba 1.1 },
4625     };
4626    
4627     $Element->{$HTML_NS}->{canvas} = {
4628 wakaba 1.40 %HTMLTransparentChecker,
4629 wakaba 1.89 status => FEATURE_HTML5_COMPLETE,
4630 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4631 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4632     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4633 wakaba 1.50 }, {
4634     %HTMLAttrStatus,
4635 wakaba 1.89 height => FEATURE_HTML5_COMPLETE,
4636     width => FEATURE_HTML5_COMPLETE,
4637 wakaba 1.1 }),
4638     };
4639    
4640     $Element->{$HTML_NS}->{map} = {
4641 wakaba 1.72 %HTMLFlowContentChecker,
4642 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4643 wakaba 1.40 check_attrs => sub {
4644     my ($self, $item, $element_state) = @_;
4645 wakaba 1.100 my $has_name;
4646 wakaba 1.4 $GetHTMLAttrsChecker->({
4647 wakaba 1.100 name => sub {
4648     my ($self, $attr) = @_;
4649     my $value = $attr->value;
4650     if (length $value) {
4651     ## NOTE: Duplication is not non-conforming.
4652     ## NOTE: Space characters are not non-conforming.
4653     #
4654     } else {
4655     $self->{onerror}->(node => $attr,
4656     type => 'empty attribute value',
4657 wakaba 1.104 level => $self->{level}->{must});
4658 wakaba 1.100 }
4659 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4660 wakaba 1.100 $has_name = [$value, $attr];
4661 wakaba 1.4 },
4662 wakaba 1.49 }, {
4663     %HTMLAttrStatus,
4664 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4665     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4666     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4667     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4668     #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4669     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4670 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4671     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4672     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4673     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4674     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4675     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4676     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4677     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4678     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4679     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4680 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4681 wakaba 1.66 })->(@_);
4682 wakaba 1.100
4683 wakaba 1.135 if ($has_name) {
4684 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4685 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4686 wakaba 1.155 $self->{onerror}
4687     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4688     type => 'id ne name',
4689     level => $self->{level}->{must});
4690 wakaba 1.100 }
4691 wakaba 1.135 } else {
4692 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4693 wakaba 1.104 type => 'attribute missing',
4694     text => 'name',
4695     level => $self->{level}->{must});
4696 wakaba 1.100 }
4697 wakaba 1.4 },
4698 wakaba 1.59 check_start => sub {
4699     my ($self, $item, $element_state) = @_;
4700     $element_state->{in_map_original} = $self->{flag}->{in_map};
4701 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4702     ## NOTE: |{in_map}| is a reference to the array which contains
4703     ## hash references. Hashes are corresponding to the opening
4704     ## |map| elements and each of them contains the key-value
4705     ## pairs corresponding to the absolute URLs for the processed
4706     ## |area| elements in the |map| element corresponding to the
4707     ## hash. The key represents the resource (## TODO: use
4708     ## absolute URL), while the value represents whether there is
4709     ## an |area| element whose |alt| attribute is specified to a
4710     ## non-empty value. If there IS such an |area| element for
4711     ## the resource specified by the key, then the value is set to
4712     ## zero (|0|). Otherwise, if there is no such an |area|
4713     ## element but there is any |area| element with the empty
4714     ## |alt=""| attribute, then the value contains an array
4715     ## reference that contains all of such |area| elements.
4716 wakaba 1.79
4717     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4718     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4719 wakaba 1.59 },
4720     check_end => sub {
4721     my ($self, $item, $element_state) = @_;
4722 wakaba 1.137
4723     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4724     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4725     next unless $nodes;
4726     for (@$nodes) {
4727     $self->{onerror}->(type => 'empty area alt',
4728     node => $_,
4729     level => $self->{level}->{html5_no_may});
4730     }
4731     }
4732    
4733     $self->{flag}->{in_map} = $element_state->{in_map_original};
4734    
4735 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4736 wakaba 1.59 },
4737 wakaba 1.1 };
4738    
4739     $Element->{$HTML_NS}->{area} = {
4740 wakaba 1.40 %HTMLEmptyChecker,
4741 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4742 wakaba 1.40 check_attrs => sub {
4743     my ($self, $item, $element_state) = @_;
4744 wakaba 1.1 my %attr;
4745     my $coords;
4746 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4747 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4748     $attr_ns = '' unless defined $attr_ns;
4749     my $attr_ln = $attr->manakai_local_name;
4750     my $checker;
4751 wakaba 1.73 my $status;
4752 wakaba 1.1 if ($attr_ns eq '') {
4753 wakaba 1.73 $status = {
4754     %HTMLAttrStatus,
4755     %HTMLM12NCommonAttrStatus,
4756     accesskey => FEATURE_M12N10_REC,
4757 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4758     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4759 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4760 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4761     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4762 wakaba 1.154 media => FEATURE_HTML5_WD,
4763 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4764     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4765     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4766 wakaba 1.153 ping => FEATURE_HTML5_WD,
4767 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4768 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4769 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4770 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4771     type => FEATURE_HTML5_WD,
4772 wakaba 1.73 }->{$attr_ln};
4773    
4774 wakaba 1.1 $checker = {
4775 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4776 wakaba 1.153 alt => sub {
4777     ## NOTE: Checked later.
4778     },
4779 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4780     circ => -1, circle => 1,
4781     default => 1,
4782     poly => 1, polygon => -1,
4783     rect => 1, rectangle => -1,
4784     }),
4785     coords => sub {
4786     my ($self, $attr) = @_;
4787     my $value = $attr->value;
4788     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4789     $coords = [split /,/, $value];
4790     } else {
4791     $self->{onerror}->(node => $attr,
4792 wakaba 1.104 type => 'coords:syntax error',
4793     level => $self->{level}->{must});
4794 wakaba 1.1 }
4795     },
4796 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4797     target => $HTMLTargetAttrChecker,
4798 wakaba 1.1 href => $HTMLURIAttrChecker,
4799     ping => $HTMLSpaceURIsAttrChecker,
4800 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4801 wakaba 1.1 media => $HTMLMQAttrChecker,
4802     hreflang => $HTMLLanguageTagAttrChecker,
4803     type => $HTMLIMTAttrChecker,
4804     }->{$attr_ln};
4805     if ($checker) {
4806     $attr{$attr_ln} = $attr;
4807 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4808     $attr_ln !~ /[A-Z]/) {
4809 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4810     $status = $HTMLDatasetAttrStatus;
4811 wakaba 1.1 } else {
4812     $checker = $HTMLAttrChecker->{$attr_ln};
4813     }
4814     }
4815     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4816 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4817     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4818     || $AttrStatus->{$attr_ns}->{''};
4819     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4820 wakaba 1.62
4821 wakaba 1.1 if ($checker) {
4822 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4823 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4824 wakaba 1.54 #
4825 wakaba 1.1 } else {
4826 wakaba 1.104 $self->{onerror}->(node => $attr,
4827     type => 'unknown attribute',
4828     level => $self->{level}->{uncertain});
4829 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4830     }
4831 wakaba 1.49
4832 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4833 wakaba 1.1 }
4834    
4835     if (defined $attr{href}) {
4836 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4837 wakaba 1.137 if (defined $attr{alt}) {
4838     my $url = $attr{href}->value; ## TODO: resolve
4839     if (length $attr{alt}->value) {
4840     for (@{$self->{flag}->{in_map} or []}) {
4841     $_->{$url} = 0;
4842     }
4843     } else {
4844     ## NOTE: Empty |alt=""|. If there is another |area| element
4845     ## with the same |href=""| and that |area| elemnet's
4846     ## |alt=""| attribute is not an empty string, then this
4847     ## is conforming.
4848     for (@{$self->{flag}->{in_map} or []}) {
4849     push @{$_->{$url} ||= []}, $attr{alt}
4850     unless exists $_->{$url} and not $_->{$url};
4851     }
4852     }
4853     } else {
4854 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4855 wakaba 1.104 type => 'attribute missing',
4856     text => 'alt',
4857     level => $self->{level}->{must});
4858 wakaba 1.1 }
4859     } else {
4860     for (qw/target ping rel media hreflang type alt/) {
4861     if (defined $attr{$_}) {
4862     $self->{onerror}->(node => $attr{$_},
4863 wakaba 1.104 type => 'attribute not allowed',
4864     level => $self->{level}->{must});
4865 wakaba 1.1 }
4866     }
4867     }
4868    
4869     my $shape = 'rectangle';
4870     if (defined $attr{shape}) {
4871     $shape = {
4872     circ => 'circle', circle => 'circle',
4873     default => 'default',
4874     poly => 'polygon', polygon => 'polygon',
4875     rect => 'rectangle', rectangle => 'rectangle',
4876     }->{lc $attr{shape}->value} || 'rectangle';
4877     ## TODO: ASCII lowercase?
4878     }
4879    
4880     if ($shape eq 'circle') {
4881     if (defined $attr{coords}) {
4882     if (defined $coords) {
4883     if (@$coords == 3) {
4884     if ($coords->[2] < 0) {
4885     $self->{onerror}->(node => $attr{coords},
4886 wakaba 1.104 type => 'coords:out of range',
4887     index => 2,
4888     value => $coords->[2],
4889     level => $self->{level}->{must});
4890 wakaba 1.1 }
4891     } else {
4892     $self->{onerror}->(node => $attr{coords},
4893 wakaba 1.104 type => 'coords:number not 3',
4894     text => 0+@$coords,
4895     level => $self->{level}->{must});
4896 wakaba 1.1 }
4897     } else {
4898     ## NOTE: A syntax error has been reported.
4899     }
4900     } else {
4901 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4902 wakaba 1.104 type => 'attribute missing',
4903     text => 'coords',
4904     level => $self->{level}->{must});
4905 wakaba 1.1 }
4906     } elsif ($shape eq 'default') {
4907     if (defined $attr{coords}) {
4908     $self->{onerror}->(node => $attr{coords},
4909 wakaba 1.104 type => 'attribute not allowed',
4910     level => $self->{level}->{must});
4911 wakaba 1.1 }
4912     } elsif ($shape eq 'polygon') {
4913     if (defined $attr{coords}) {
4914     if (defined $coords) {
4915     if (@$coords >= 6) {
4916     unless (@$coords % 2 == 0) {
4917     $self->{onerror}->(node => $attr{coords},
4918 wakaba 1.104 type => 'coords:number not even',
4919     text => 0+@$coords,
4920     level => $self->{level}->{must});
4921 wakaba 1.1 }
4922     } else {
4923     $self->{onerror}->(node => $attr{coords},
4924 wakaba 1.104 type => 'coords:number lt 6',
4925     text => 0+@$coords,
4926     level => $self->{level}->{must});
4927 wakaba 1.1 }
4928     } else {
4929     ## NOTE: A syntax error has been reported.
4930     }
4931     } else {
4932 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4933 wakaba 1.104 type => 'attribute missing',
4934     text => 'coords',
4935     level => $self->{level}->{must});
4936 wakaba 1.1 }
4937     } elsif ($shape eq 'rectangle') {
4938     if (defined $attr{coords}) {
4939     if (defined $coords) {
4940     if (@$coords == 4) {
4941     unless ($coords->[0] < $coords->[2]) {
4942     $self->{onerror}->(node => $attr{coords},
4943 wakaba 1.104 type => 'coords:out of range',
4944     index => 0,
4945     value => $coords->[0],
4946     level => $self->{level}->{must});
4947 wakaba 1.1 }
4948     unless ($coords->[1] < $coords->[3]) {
4949     $self->{onerror}->(node => $attr{coords},
4950 wakaba 1.104 type => 'coords:out of range',
4951     index => 1,
4952     value => $coords->[1],
4953     level => $self->{level}->{must});
4954 wakaba 1.1 }
4955     } else {
4956     $self->{onerror}->(node => $attr{coords},
4957 wakaba 1.104 type => 'coords:number not 4',
4958     text => 0+@$coords,
4959     level => $self->{level}->{must});
4960 wakaba 1.1 }
4961     } else {
4962     ## NOTE: A syntax error has been reported.
4963     }
4964     } else {
4965 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4966 wakaba 1.104 type => 'attribute missing',
4967     text => 'coords',
4968     level => $self->{level}->{must});
4969 wakaba 1.1 }
4970     }
4971 wakaba 1.66
4972     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
4973 wakaba 1.1 },
4974 wakaba 1.59 check_start => sub {
4975     my ($self, $item, $element_state) = @_;
4976     unless ($self->{flag}->{in_map} or
4977     not $item->{node}->manakai_parent_element) {
4978     $self->{onerror}->(node => $item->{node},
4979     type => 'element not allowed:area',
4980 wakaba 1.104 level => $self->{level}->{must});
4981 wakaba 1.59 }
4982 wakaba 1.79
4983     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4984     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4985 wakaba 1.59 },
4986 wakaba 1.1 };
4987    
4988     $Element->{$HTML_NS}->{table} = {
4989 wakaba 1.40 %HTMLChecker,
4990 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4991 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
4992 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
4993     cellspacing => $HTMLLengthAttrChecker,
4994 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
4995     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
4996     lhs => 1, rhs => 1, box => 1, border => 1,
4997     }),
4998     rules => $GetHTMLEnumeratedAttrChecker->({
4999     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
5000     }),
5001     summary => sub {}, ## NOTE: %Text; in HTML4.
5002     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
5003     }, {
5004 wakaba 1.49 %HTMLAttrStatus,
5005 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5006 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5007     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5008     border => FEATURE_M12N10_REC,
5009     cellpadding => FEATURE_M12N10_REC,
5010     cellspacing => FEATURE_M12N10_REC,
5011 wakaba 1.61 cols => FEATURE_RFC1942,
5012 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5013     dataformatas => FEATURE_HTML4_REC_RESERVED,
5014     datapagesize => FEATURE_M12N10_REC,
5015     datasrc => FEATURE_HTML4_REC_RESERVED,
5016     frame => FEATURE_M12N10_REC,
5017 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5018 wakaba 1.49 rules => FEATURE_M12N10_REC,
5019     summary => FEATURE_M12N10_REC,
5020     width => FEATURE_M12N10_REC,
5021     }),
5022 wakaba 1.40 check_start => sub {
5023     my ($self, $item, $element_state) = @_;
5024     $element_state->{phase} = 'before caption';
5025 wakaba 1.66
5026     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5027 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5028     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5029 wakaba 1.40 },
5030     check_child_element => sub {
5031     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5032     $child_is_transparent, $element_state) = @_;
5033 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5034     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5035 wakaba 1.40 $self->{onerror}->(node => $child_el,
5036     type => 'element not allowed:minus',
5037 wakaba 1.104 level => $self->{level}->{must});
5038 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5039     #
5040     } elsif ($element_state->{phase} eq 'in tbodys') {
5041     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5042     #$element_state->{phase} = 'in tbodys';
5043     } elsif (not $element_state->{has_tfoot} and
5044     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5045     $element_state->{phase} = 'after tfoot';
5046     $element_state->{has_tfoot} = 1;
5047     } else {
5048 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5049     level => $self->{level}->{must});
5050 wakaba 1.40 }
5051     } elsif ($element_state->{phase} eq 'in trs') {
5052     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5053     #$element_state->{phase} = 'in trs';
5054     } elsif (not $element_state->{has_tfoot} and
5055     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5056     $element_state->{phase} = 'after tfoot';
5057     $element_state->{has_tfoot} = 1;
5058     } else {
5059 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5060     level => $self->{level}->{must});
5061 wakaba 1.40 }
5062     } elsif ($element_state->{phase} eq 'after thead') {
5063     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5064     $element_state->{phase} = 'in tbodys';
5065     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5066     $element_state->{phase} = 'in trs';
5067     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5068     $element_state->{phase} = 'in tbodys';
5069     $element_state->{has_tfoot} = 1;
5070     } else {
5071 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5072     level => $self->{level}->{must});
5073 wakaba 1.40 }
5074     } elsif ($element_state->{phase} eq 'in colgroup') {
5075     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5076     $element_state->{phase} = 'in colgroup';
5077     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5078     $element_state->{phase} = 'after thead';
5079     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5080     $element_state->{phase} = 'in tbodys';
5081     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5082     $element_state->{phase} = 'in trs';
5083     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5084     $element_state->{phase} = 'in tbodys';
5085     $element_state->{has_tfoot} = 1;
5086     } else {
5087 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5088     level => $self->{level}->{must});
5089 wakaba 1.40 }
5090     } elsif ($element_state->{phase} eq 'before caption') {
5091     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5092     $element_state->{phase} = 'in colgroup';
5093     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5094     $element_state->{phase} = 'in colgroup';
5095     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5096     $element_state->{phase} = 'after thead';
5097     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5098     $element_state->{phase} = 'in tbodys';
5099     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5100     $element_state->{phase} = 'in trs';
5101     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5102     $element_state->{phase} = 'in tbodys';
5103     $element_state->{has_tfoot} = 1;
5104     } else {
5105 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5106     level => $self->{level}->{must});
5107 wakaba 1.40 }
5108     } elsif ($element_state->{phase} eq 'after tfoot') {
5109 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5110     level => $self->{level}->{must});
5111 wakaba 1.40 } else {
5112     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5113     }
5114     },
5115     check_child_text => sub {
5116     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5117     if ($has_significant) {
5118 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5119     level => $self->{level}->{must});
5120 wakaba 1.1 }
5121 wakaba 1.40 },
5122     check_end => sub {
5123     my ($self, $item, $element_state) = @_;
5124 wakaba 1.1
5125     ## Table model errors
5126     require Whatpm::HTMLTable;
5127 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5128 wakaba 1.104 $self->{onerror}->(@_);
5129     }, $self->{level});
5130 wakaba 1.87 Whatpm::HTMLTable->assign_header
5131 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5132 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5133 wakaba 1.1
5134 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5135 wakaba 1.1 },
5136     };
5137    
5138     $Element->{$HTML_NS}->{caption} = {
5139 wakaba 1.169 %HTMLFlowContentChecker,
5140 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5141 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5142     align => $GetHTMLEnumeratedAttrChecker->({
5143     top => 1, bottom => 1, left => 1, right => 1,
5144     }),
5145     }, {
5146 wakaba 1.49 %HTMLAttrStatus,
5147 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5148 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5149 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5150 wakaba 1.49 }),
5151 wakaba 1.169 check_start => sub {
5152     my ($self, $item, $element_state) = @_;
5153     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5154    
5155     $HTMLFlowContentChecker{check_start}->(@_);
5156     },
5157     check_end => sub {
5158     my ($self, $item, $element_state) = @_;
5159     $self->_remove_minus_elements ($element_state);
5160    
5161     $HTMLFlowContentChecker{check_end}->(@_);
5162     },
5163     }; # caption
5164 wakaba 1.1
5165 wakaba 1.69 my %cellalign = (
5166     ## HTML4 %cellhalign;
5167 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5168     left => 1, center => 1, right => 1, justify => 1, char => 1,
5169     }),
5170     char => sub {
5171     my ($self, $attr) = @_;
5172 wakaba 1.69
5173 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5174    
5175     my $value = $attr->value;
5176     if (length $value != 1) {
5177     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5178 wakaba 1.105 level => $self->{level}->{html4_fact});
5179 wakaba 1.70 }
5180     },
5181 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5182    
5183 wakaba 1.69 ## HTML4 %cellvalign;
5184 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5185     top => 1, middle => 1, bottom => 1, baseline => 1,
5186     }),
5187 wakaba 1.69 );
5188    
5189 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5190 wakaba 1.40 %HTMLEmptyChecker,
5191 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5192 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5193 wakaba 1.69 %cellalign,
5194 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5195     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5196     ## TODO: "attribute not supported" if |col|.
5197     ## ISSUE: MUST NOT if any |col|?
5198     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5199 wakaba 1.49 }, {
5200     %HTMLAttrStatus,
5201 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5202 wakaba 1.49 align => FEATURE_M12N10_REC,
5203     char => FEATURE_M12N10_REC,
5204     charoff => FEATURE_M12N10_REC,
5205 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5206     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5207 wakaba 1.49 valign => FEATURE_M12N10_REC,
5208     width => FEATURE_M12N10_REC,
5209 wakaba 1.1 }),
5210 wakaba 1.40 check_child_element => sub {
5211     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5212     $child_is_transparent, $element_state) = @_;
5213 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5214     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5215 wakaba 1.40 $self->{onerror}->(node => $child_el,
5216     type => 'element not allowed:minus',
5217 wakaba 1.104 level => $self->{level}->{must});
5218 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5219     #
5220     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5221     #
5222     } else {
5223 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5224     level => $self->{level}->{must});
5225 wakaba 1.40 }
5226     },
5227     check_child_text => sub {
5228     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5229     if ($has_significant) {
5230 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5231     level => $self->{level}->{must});
5232 wakaba 1.1 }
5233     },
5234     };
5235    
5236     $Element->{$HTML_NS}->{col} = {
5237 wakaba 1.40 %HTMLEmptyChecker,
5238 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5239 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5240 wakaba 1.69 %cellalign,
5241 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5242 wakaba 1.49 }, {
5243     %HTMLAttrStatus,
5244 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5245 wakaba 1.49 align => FEATURE_M12N10_REC,
5246     char => FEATURE_M12N10_REC,
5247     charoff => FEATURE_M12N10_REC,
5248 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5249     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5250 wakaba 1.49 valign => FEATURE_M12N10_REC,
5251     width => FEATURE_M12N10_REC,
5252 wakaba 1.1 }),
5253     };
5254    
5255     $Element->{$HTML_NS}->{tbody} = {
5256 wakaba 1.40 %HTMLChecker,
5257 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5258 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5259     %cellalign,
5260     }, {
5261 wakaba 1.49 %HTMLAttrStatus,
5262 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5263 wakaba 1.49 align => FEATURE_M12N10_REC,
5264     char => FEATURE_M12N10_REC,
5265     charoff => FEATURE_M12N10_REC,
5266 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5267 wakaba 1.49 valign => FEATURE_M12N10_REC,
5268     }),
5269 wakaba 1.40 check_child_element => sub {
5270     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5271     $child_is_transparent, $element_state) = @_;
5272 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5273     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5274 wakaba 1.40 $self->{onerror}->(node => $child_el,
5275     type => 'element not allowed:minus',
5276 wakaba 1.104 level => $self->{level}->{must});
5277 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5278     #
5279     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5280 wakaba 1.84 #
5281 wakaba 1.40 } else {
5282 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5283     level => $self->{level}->{must});
5284 wakaba 1.40 }
5285     },
5286     check_child_text => sub {
5287     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5288     if ($has_significant) {
5289 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5290     level => $self->{level}->{must});
5291 wakaba 1.1 }
5292 wakaba 1.40 },
5293 wakaba 1.1 };
5294    
5295     $Element->{$HTML_NS}->{thead} = {
5296 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5297 wakaba 1.1 };
5298    
5299     $Element->{$HTML_NS}->{tfoot} = {
5300 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5301 wakaba 1.1 };
5302    
5303     $Element->{$HTML_NS}->{tr} = {
5304 wakaba 1.40 %HTMLChecker,
5305 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5306 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5307     %cellalign,
5308     bgcolor => $HTMLColorAttrChecker,
5309     }, {
5310 wakaba 1.49 %HTMLAttrStatus,
5311 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5312 wakaba 1.49 align => FEATURE_M12N10_REC,
5313     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5314     char => FEATURE_M12N10_REC,
5315     charoff => FEATURE_M12N10_REC,
5316 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5317 wakaba 1.49 valign => FEATURE_M12N10_REC,
5318     }),
5319 wakaba 1.40 check_child_element => sub {
5320     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5321     $child_is_transparent, $element_state) = @_;
5322 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5323     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5324 wakaba 1.40 $self->{onerror}->(node => $child_el,
5325     type => 'element not allowed:minus',
5326 wakaba 1.104 level => $self->{level}->{must});
5327 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5328     #
5329     } elsif ($child_nsuri eq $HTML_NS and
5330     ($child_ln eq 'td' or $child_ln eq 'th')) {
5331 wakaba 1.84 #
5332 wakaba 1.40 } else {
5333 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5334     level => $self->{level}->{must});
5335 wakaba 1.40 }
5336     },
5337     check_child_text => sub {
5338     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5339     if ($has_significant) {
5340 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5341     level => $self->{level}->{must});
5342 wakaba 1.1 }
5343     },
5344     };
5345    
5346     $Element->{$HTML_NS}->{td} = {
5347 wakaba 1.72 %HTMLFlowContentChecker,
5348 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5349 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5350 wakaba 1.69 %cellalign,
5351     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5352     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5353     bgcolor => $HTMLColorAttrChecker,
5354 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5355 wakaba 1.87 headers => sub {
5356     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5357     ## Though that method does not check the |headers| attribute of a
5358     ## |td| element if the element does not form a table, in that case
5359     ## the |td| element is non-conforming anyway.
5360     },
5361 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5362 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5363 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5364     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5365 wakaba 1.49 }, {
5366     %HTMLAttrStatus,
5367 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5368     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5369 wakaba 1.49 align => FEATURE_M12N10_REC,
5370 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5371 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5372     char => FEATURE_M12N10_REC,
5373     charoff => FEATURE_M12N10_REC,
5374 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5375     headers => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5376 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5377 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5378 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5379 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5380 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5381 wakaba 1.49 valign => FEATURE_M12N10_REC,
5382     width => FEATURE_M12N10_REC_DEPRECATED,
5383 wakaba 1.1 }),
5384     };
5385    
5386     $Element->{$HTML_NS}->{th} = {
5387 wakaba 1.40 %HTMLPhrasingContentChecker,
5388 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5389 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5390 wakaba 1.69 %cellalign,
5391     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5392     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5393     bgcolor => $HTMLColorAttrChecker,
5394 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5395 wakaba 1.87 ## TODO: HTML4(?) |headers|
5396 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5397 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5398     scope => $GetHTMLEnumeratedAttrChecker
5399     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5400 wakaba 1.49 }, {
5401     %HTMLAttrStatus,
5402 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5403     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5404 wakaba 1.49 align => FEATURE_M12N10_REC,
5405 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5406 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5407     char => FEATURE_M12N10_REC,
5408     charoff => FEATURE_M12N10_REC,
5409 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5410 wakaba 1.82 headers => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5411 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5412 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5413 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5414 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5415     scope => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5416 wakaba 1.49 valign => FEATURE_M12N10_REC,
5417     width => FEATURE_M12N10_REC_DEPRECATED,
5418 wakaba 1.1 }),
5419     };
5420    
5421 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5422 wakaba 1.121 %HTMLFlowContentChecker,
5423 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5424 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5425 wakaba 1.161 accept => $AcceptAttrChecker,
5426 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5427 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5428 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5429 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5430     'application/x-www-form-urlencoded' => 1,
5431     'multipart/form-data' => 1,
5432     'text/plain' => 1,
5433     }),
5434 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5435     get => 1, post => 1, put => 1, delete => 1,
5436     }),
5437 wakaba 1.133 name => sub {
5438     my ($self, $attr) = @_;
5439    
5440     my $value = $attr->value;
5441     if ($value eq '') {
5442     $self->{onerror}->(type => 'empty form name',
5443     node => $attr,
5444     level => $self->{level}->{must});
5445     } else {
5446     if ($self->{form}->{$value}) {
5447     $self->{onerror}->(type => 'duplicate form name',
5448     node => $attr,
5449     value => $value,
5450     level => $self->{level}->{must});
5451     } else {
5452     $self->{form}->{$value} = 1;
5453     }
5454     }
5455     },
5456 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5457     ## TODO: Tests for following attrs:
5458 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5459     onforminput => $HTMLEventHandlerAttrChecker,
5460 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5461     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5462 wakaba 1.52 target => $HTMLTargetAttrChecker,
5463     }, {
5464     %HTMLAttrStatus,
5465     %HTMLM12NCommonAttrStatus,
5466 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5467 wakaba 1.119 'accept-charset' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5468     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5469 wakaba 1.56 data => FEATURE_WF2,
5470 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5471 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5472 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5473     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5474     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5475 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5476 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5477     onforminput => FEATURE_WF2_INFORMATIVE,
5478 wakaba 1.56 onreceived => FEATURE_WF2,
5479 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5480     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5481 wakaba 1.56 replace => FEATURE_WF2,
5482 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5483     sdasuff => FEATURE_HTML20_RFC,
5484 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5485 wakaba 1.52 }),
5486 wakaba 1.66 check_start => sub {
5487     my ($self, $item, $element_state) = @_;
5488 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5489 wakaba 1.66
5490     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5491     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5492 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5493     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5494 wakaba 1.136 $element_state->{id_type} = 'form';
5495 wakaba 1.66 },
5496 wakaba 1.121 check_end => sub {
5497     my ($self, $item, $element_state) = @_;
5498     $self->_remove_minus_elements ($element_state);
5499    
5500     $HTMLFlowContentChecker{check_end}->(@_);
5501     },
5502 wakaba 1.52 };
5503    
5504     $Element->{$HTML_NS}->{fieldset} = {
5505 wakaba 1.134 %HTMLFlowContentChecker,
5506 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5507 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5508     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5509 wakaba 1.136 form => $HTMLFormAttrChecker,
5510 wakaba 1.165 name => $FormControlNameAttrChecker,
5511 wakaba 1.56 }, {
5512 wakaba 1.52 %HTMLAttrStatus,
5513     %HTMLM12NCommonAttrStatus,
5514 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5515     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5516 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5517 wakaba 1.125 name => FEATURE_HTML5_DEFAULT,
5518 wakaba 1.52 }),
5519 wakaba 1.134 ## NOTE: legend, Flow
5520     check_child_element => sub {
5521     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5522     $child_is_transparent, $element_state) = @_;
5523     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5524     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5525     $self->{onerror}->(node => $child_el,
5526     type => 'element not allowed:minus',
5527     level => $self->{level}->{must});
5528     $element_state->{has_non_legend} = 1;
5529     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5530     #
5531     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5532     if ($element_state->{has_non_legend}) {
5533     $self->{onerror}->(node => $child_el,
5534     type => 'element not allowed:details legend',
5535     level => $self->{level}->{must});
5536     }
5537     $element_state->{has_legend} = 1;
5538     $element_state->{has_non_legend} = 1;
5539     } else {
5540     $HTMLFlowContentChecker{check_child_element}->(@_);
5541     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5542     ## TODO:
5543 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5544 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5545     ## therefore |details| part of the content model does not match.
5546     }
5547     },
5548     check_child_text => sub {
5549     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5550     if ($has_significant) {
5551     $element_state->{has_non_legend} = 1;
5552     }
5553     },
5554     check_end => sub {
5555     my ($self, $item, $element_state) = @_;
5556    
5557     unless ($element_state->{has_legend}) {
5558     $self->{onerror}->(node => $item->{node},
5559     type => 'child element missing',
5560     text => 'legend',
5561     level => $self->{level}->{must});
5562     }
5563    
5564     $HTMLFlowContentChecker{check_end}->(@_);
5565 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5566 wakaba 1.134 },
5567     ## NOTE: This definition is partially reused by |details| element's
5568     ## checker.
5569 wakaba 1.52 };
5570    
5571     $Element->{$HTML_NS}->{input} = {
5572 wakaba 1.119 %HTMLEmptyChecker,
5573     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5574 wakaba 1.140 check_attrs => sub {
5575     my ($self, $item, $element_state) = @_;
5576 wakaba 1.142
5577 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5578 wakaba 1.142 $state = 'text' unless defined $state;
5579     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5580    
5581 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5582     my $attr_ns = $attr->namespace_uri;
5583     $attr_ns = '' unless defined $attr_ns;
5584     my $attr_ln = $attr->manakai_local_name;
5585     my $checker;
5586     my $status;
5587     if ($attr_ns eq '') {
5588     $status =
5589     {
5590     %HTMLAttrStatus,
5591     %HTMLM12NCommonAttrStatus,
5592     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5593     'accept-charset' => FEATURE_HTML2X_RFC,
5594     accesskey => FEATURE_M12N10_REC,
5595     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5596     align => FEATURE_M12N10_REC_DEPRECATED,
5597     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5598     autocomplete => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5599     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5600     checked => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5601     datafld => FEATURE_HTML4_REC_RESERVED,
5602     dataformatas => FEATURE_HTML4_REC_RESERVED,
5603     datasrc => FEATURE_HTML4_REC_RESERVED,
5604     disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5605     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5606     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5607 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5608     FEATURE_XHTMLBASIC11_CR,
5609 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5610 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5611 wakaba 1.140 list => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5612     max => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5613 wakaba 1.150 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X |
5614     FEATURE_M12N10_REC,
5615 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5616     min => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5617 wakaba 1.156 multiple => FEATURE_HTML5_DEFAULT,
5618 wakaba 1.140 name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5619 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5620 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5621     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5622     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5623     onformchange => FEATURE_WF2_INFORMATIVE,
5624     onforminput => FEATURE_WF2_INFORMATIVE,
5625     oninput => FEATURE_WF2,
5626     oninvalid => FEATURE_WF2,
5627     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5628     pattern => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5629 wakaba 1.156 placeholder => FEATURE_HTML5_DEFAULT,
5630 wakaba 1.140 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5631     replace => FEATURE_WF2,
5632     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5633     sdapref => FEATURE_HTML20_RFC,
5634 wakaba 1.154 size => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5635 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5636     step => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5637     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5638     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5639 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5640 wakaba 1.140 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5641     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5642     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5643     }->{$attr_ln};
5644    
5645     $checker =
5646     {
5647 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5648     ## applicable for a specific set of states.
5649 wakaba 1.142 accept => '',
5650 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5651     ## NOTE: To which states it applies is not defined in RFC 2070.
5652 wakaba 1.150 accesskey => '', ## NOTE: Not applied to |hidden| [WF2].
5653 wakaba 1.142 action => '',
5654 wakaba 1.150 align => '',
5655 wakaba 1.141 alt => '',
5656 wakaba 1.142 autocomplete => '',
5657 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5658     ## NOTE: <input type=hidden disabled> is not disallowed.
5659 wakaba 1.142 checked => '',
5660     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5661 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5662 wakaba 1.142 enctype => '',
5663     form => $HTMLFormAttrChecker,
5664 wakaba 1.150 inputmode => '',
5665     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5666 wakaba 1.142 list => '',
5667     max => '',
5668     maxlength => '',
5669     method => '',
5670     min => '',
5671 wakaba 1.156 multiple => '',
5672 wakaba 1.165 name => $FormControlNameAttrChecker,
5673 wakaba 1.166 novalidate => '',
5674 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5675     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5676     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5677     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5678     ## TODO: tests for four attributes above
5679 wakaba 1.142 pattern => '',
5680 wakaba 1.156 placeholder => '',
5681 wakaba 1.142 readonly => '',
5682 wakaba 1.150 replace => '',
5683 wakaba 1.142 required => '',
5684     size => '',
5685     src => '',
5686     step => '',
5687     target => '',
5688 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5689 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5690     email => 1, password => 1,
5691 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5692 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5693     checkbox => 1,
5694 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5695     button => 1,
5696 wakaba 1.140 }),
5697 wakaba 1.151 usemap => '',
5698 wakaba 1.142 value => '',
5699 wakaba 1.140 }->{$attr_ln};
5700 wakaba 1.141
5701     ## State-dependent checkers
5702     unless ($checker) {
5703     if ($state eq 'hidden') {
5704     $checker =
5705     {
5706 wakaba 1.142 value => sub {
5707     my ($self, $attr, $item, $element_state) = @_;
5708 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5709 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5710     $self->{onerror}->(node => $attr,
5711     type => '_charset_ value',
5712     level => $self->{level}->{must});
5713     }
5714     },
5715 wakaba 1.141 }->{$attr_ln} || $checker;
5716 wakaba 1.142 ## TODO: Warn if no name attribute?
5717     ## TODO: Warn if name!=_charset_ and no value attribute?
5718 wakaba 1.168 } elsif ({
5719     datetime => 1, date => 1, month => 1, time => 1,
5720     week => 1, 'datetime-local' => 1,
5721     }->{$state}) {
5722     my $v = {
5723     datetime => ['global_date_and_time_string'],
5724     date => ['date_string'],
5725     month => ['month_string'],
5726     week => ['week_string'],
5727     time => ['time_string'],
5728     'datetime-local' => ['local_date_and_time_string'],
5729     }->{$state};
5730 wakaba 1.144 $checker =
5731     {
5732 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5733 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5734     on => 1, off => 1,
5735     }),
5736 wakaba 1.158 list => $ListAttrChecker,
5737 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5738     max => $GetDateTimeAttrChecker->($v->[0]),
5739 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5740 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5741 wakaba 1.148 step => $StepAttrChecker,
5742 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5743 wakaba 1.144 }->{$attr_ln} || $checker;
5744     } elsif ($state eq 'number') {
5745     $checker =
5746     {
5747 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5748 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5749     on => 1, off => 1,
5750     }),
5751 wakaba 1.158 list => $ListAttrChecker,
5752 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5753     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5754 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5755 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5756 wakaba 1.148 step => $StepAttrChecker,
5757 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5758 wakaba 1.144 }->{$attr_ln} || $checker;
5759     } elsif ($state eq 'range') {
5760     $checker =
5761     {
5762 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5763 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5764     on => 1, off => 1,
5765     }),
5766 wakaba 1.158 list => $ListAttrChecker,
5767 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5768     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5769 wakaba 1.148 step => $StepAttrChecker,
5770 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5771 wakaba 1.144 }->{$attr_ln} || $checker;
5772 wakaba 1.157 } elsif ($state eq 'color') {
5773     $checker =
5774     {
5775     accesskey => $HTMLAccesskeyAttrChecker,
5776     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5777     on => 1, off => 1,
5778     }),
5779 wakaba 1.158 list => $ListAttrChecker,
5780 wakaba 1.157 value => sub {
5781     my ($self, $attr) = @_;
5782     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5783     $self->{onerror}->(node => $attr,
5784     type => 'scolor:syntax error', ## TODOC: type
5785     level => $self->{level}->{must});
5786     }
5787     },
5788     }->{$attr_ln} || $checker;
5789 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5790     $checker =
5791     {
5792 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5793 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5794     ## ISSUE: checked value not (yet?) defined.
5795     ## TODO: tests
5796 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5797 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5798     }->{$attr_ln} || $checker;
5799     ## TODO: There MUST be another input type=radio with same
5800     ## name (Radio state).
5801     ## ISSUE: There should be exactly one type=radio with checked?
5802     } elsif ($state eq 'file') {
5803     $checker =
5804     {
5805 wakaba 1.161 accept => $AcceptAttrChecker,
5806 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5807 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5808 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5809 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5810 wakaba 1.144 }->{$attr_ln} || $checker;
5811     } elsif ($state eq 'submit') {
5812     $checker =
5813     {
5814 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5815 wakaba 1.149 action => $HTMLURIAttrChecker,
5816 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5817     'application/x-www-form-urlencoded' => 1,
5818     'multipart/form-data' => 1,
5819     'text/plain' => 1,
5820     }),
5821 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5822     get => 1, post => 1, put => 1, delete => 1,
5823     }),
5824 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5825 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5826     document => 1, values => 1,
5827     }),
5828     target => $HTMLTargetAttrChecker,
5829 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5830     }->{$attr_ln} || $checker;
5831     } elsif ($state eq 'image') {
5832     $checker =
5833     {
5834 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5835 wakaba 1.149 action => $HTMLURIAttrChecker,
5836     align => $GetHTMLEnumeratedAttrChecker->({
5837     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5838     }),
5839 wakaba 1.144 alt => sub {
5840     my ($self, $attr) = @_;
5841     my $value = $attr->value;
5842     unless (length $value) {
5843     $self->{onerror}->(node => $attr,
5844     type => 'empty anchor image alt',
5845     level => $self->{level}->{must});
5846     }
5847     },
5848 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5849     'application/x-www-form-urlencoded' => 1,
5850     'multipart/form-data' => 1,
5851     'text/plain' => 1,
5852     }),
5853 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5854     method => $GetHTMLEnumeratedAttrChecker->({
5855     get => 1, post => 1, put => 1, delete => 1,
5856     }),
5857 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5858 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5859     document => 1, values => 1,
5860     }),
5861 wakaba 1.144 src => $HTMLURIAttrChecker,
5862     ## TODO: There is requirements on the referenced resource.
5863 wakaba 1.149 target => $HTMLTargetAttrChecker,
5864     usemap => $HTMLUsemapAttrChecker,
5865 wakaba 1.144 }->{$attr_ln} || $checker;
5866     ## TODO: alt & src are required.
5867     } elsif ({
5868     reset => 1, button => 1,
5869     ## NOTE: From Web Forms 2.0:
5870     remove => 1, 'move-up' => 1, 'move-down' => 1,
5871     add => 1,
5872     }->{$state}) {
5873     $checker =
5874     {
5875 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5876 wakaba 1.144 ## NOTE: According to Web Forms 2.0, |input| attribute
5877     ## has |template| attribute to support the |add| button
5878     ## type (as part of the repetition template feature). It
5879     ## conflicts with the |template| global attribute
5880     ## introduced as part of the data template feature.
5881     ## NOTE: |template| attribute as defined in Web Forms 2.0
5882     ## has no author requirement.
5883     value => sub { }, ## NOTE: No restriction.
5884     }->{$attr_ln} || $checker;
5885 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5886 wakaba 1.141 $checker =
5887     {
5888 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5889 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5890     on => 1, off => 1,
5891     }),
5892 wakaba 1.149 ## TODO: inputmode [WF2]
5893 wakaba 1.158 list => $ListAttrChecker,
5894 wakaba 1.147 maxlength => sub {
5895     my ($self, $attr, $item, $element_state) = @_;
5896    
5897     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5898    
5899 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5900 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5901     ## integers results in a number.
5902     my $max_allowed_value_length = 0+$1;
5903    
5904     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5905     if (defined $value) {
5906     my $codepoint_length = length $value;
5907 wakaba 1.162
5908 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5909     $self->{onerror}
5910     ->(node => $item->{node}
5911     ->get_attribute_node_ns (undef, 'value'),
5912     type => 'value too long',
5913     level => $self->{level}->{must});
5914     }
5915     }
5916     }
5917     },
5918 wakaba 1.160 pattern => $PatternAttrChecker,
5919 wakaba 1.159 placeholder => sub {
5920     my ($self, $attr) = @_;
5921     if ($attr->value =~ /[\x0D\x0A]/) {
5922     $self->{onerror}->(node => $attr,
5923     type => 'newline in value', ## TODOC: type
5924     level => $self->{level}->{must});
5925     }
5926     },
5927 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5928 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5929 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5930 wakaba 1.143 value => sub {
5931 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5932     if ($state eq 'url') {
5933     $HTMLURIAttrChecker->(@_);
5934     } elsif ($state eq 'email') {
5935     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5936     my @addr = split /,/, $attr->value, -1;
5937     @addr = ('') unless @addr;
5938     for (@addr) {
5939 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5940     s/[\x09\x0A\x0C\x0D\x20]\z//;
5941 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5942     $self->{onerror}->(node => $attr,
5943     type => 'email:syntax error', ## TODO: type
5944     value => $_,
5945     level => $self->{level}->{must});
5946     }
5947     }
5948     } else {
5949     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5950     $self->{onerror}->(node => $attr,
5951     type => 'email:syntax error', ## TODO: type
5952     level => $self->{level}->{must});
5953     }
5954     }
5955     } else {
5956     if ($attr->value =~ /[\x0D\x0A]/) {
5957     $self->{onerror}->(node => $attr,
5958     type => 'newline in value', ## TODO: type
5959     level => $self->{level}->{must});
5960     }
5961     }
5962 wakaba 1.143 },
5963 wakaba 1.141 }->{$attr_ln} || $checker;
5964 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5965 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
5966     if $state eq 'email' and $attr_ln eq 'multiple';
5967 wakaba 1.161
5968     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
5969     not $item->{node}->has_attribute_ns (undef, 'title')) {
5970     $self->{onerror}->(node => $item->{node},
5971     type => 'attribute missing',
5972     text => 'title',
5973     level => $self->{level}->{should});
5974     }
5975 wakaba 1.141 }
5976     }
5977    
5978     if (defined $checker) {
5979     if ($checker eq '') {
5980     $checker = sub {
5981     my ($self, $attr) = @_;
5982     $self->{onerror}->(node => $attr,
5983     type => 'input attr not applicable',
5984     text => $state,
5985     level => $self->{level}->{must});
5986     };
5987     }
5988 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
5989     $attr_ln !~ /[A-Z]/) {
5990     $checker = $HTMLDatasetAttrChecker;
5991     $status = $HTMLDatasetAttrStatus;
5992     } else {
5993     $checker = $HTMLAttrChecker->{$attr_ln};
5994     }
5995     }
5996     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
5997     || $AttrChecker->{$attr_ns}->{''};
5998     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
5999     || $AttrStatus->{$attr_ns}->{''};
6000     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
6001 wakaba 1.157
6002     ## TODOC: accesskey="" is also applied to type=search and type=color
6003 wakaba 1.140
6004     if ($checker) {
6005     $checker->($self, $attr, $item, $element_state) if ref $checker;
6006     } elsif ($attr_ns eq '' and not $status) {
6007     #
6008     } else {
6009     $self->{onerror}->(node => $attr,
6010     type => 'unknown attribute',
6011     level => $self->{level}->{uncertain});
6012     ## ISSUE: No comformance createria for unknown attributes in the spec
6013     }
6014    
6015     $self->_attr_status_info ($attr, $status);
6016     }
6017 wakaba 1.168
6018     ## ISSUE: -0/+0
6019    
6020     if ($state eq 'range') {
6021     $element_state->{number_value}->{min} ||= 0;
6022     $element_state->{number_value}->{max} = 100
6023     unless defined $element_state->{number_value}->{max};
6024     }
6025    
6026     if (defined $element_state->{date_value}->{min} or
6027     defined $element_state->{date_value}->{max}) {
6028     my $min_value = $element_state->{date_value}->{min};
6029     my $max_value = $element_state->{date_value}->{max};
6030     my $value_value = $element_state->{date_value}->{value};
6031    
6032     if (defined $min_value and $min_value eq '' and
6033     (defined $max_value or defined $value_value)) {
6034     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6035     $self->{onerror}->(node => $min,
6036     type => 'date value not supported', ## TODOC: type
6037     value => $min->value,
6038     level => $self->{level}->{unsupported});
6039     undef $min_value;
6040     }
6041     if (defined $max_value and $max_value eq '' and
6042     (defined $max_value or defined $value_value)) {
6043     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6044     $self->{onerror}->(node => $max,
6045     type => 'date value not supported', ## TODOC: type
6046     value => $max->value,
6047     level => $self->{level}->{unsupported});
6048     undef $max_value;
6049     }
6050     if (defined $value_value and $value_value eq '' and
6051     (defined $max_value or defined $min_value)) {
6052     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6053     $self->{onerror}->(node => $value,
6054     type => 'date value not supported', ## TODOC: type
6055     value => $value->value,
6056     level => $self->{level}->{unsupported});
6057     undef $value_value;
6058     }
6059    
6060     if (defined $min_value and defined $max_value) {
6061     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6062     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6063     $self->{onerror}->(node => $max,
6064     type => 'max lt min', ## TODOC: type
6065     level => $self->{level}->{must});
6066     }
6067     }
6068    
6069     if (defined $min_value and defined $value_value) {
6070     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6071     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6072     $self->{onerror}->(node => $value,
6073     type => 'value lt min', ## TODOC: type
6074     level => $self->{level}->{warn});
6075     ## NOTE: Not an error.
6076     }
6077     }
6078    
6079     if (defined $max_value and defined $value_value) {
6080     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6081     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6082     $self->{onerror}->(node => $value,
6083     type => 'value gt max', ## TODOC: type
6084     level => $self->{level}->{warn});
6085     ## NOTE: Not an error.
6086     }
6087     }
6088     } elsif (defined $element_state->{number_value}->{min} or
6089     defined $element_state->{number_value}->{max}) {
6090     my $min_value = $element_state->{number_value}->{min};
6091     my $max_value = $element_state->{number_value}->{max};
6092     my $value_value = $element_state->{number_value}->{value};
6093    
6094     if (defined $min_value and defined $max_value) {
6095     if ($min_value > $max_value) {
6096     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6097     $self->{onerror}->(node => $max,
6098     type => 'max lt min', ## TODOC: type
6099     level => $self->{level}->{must});
6100     }
6101     }
6102    
6103     if (defined $min_value and defined $value_value) {
6104     if ($min_value > $value_value) {
6105     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6106     $self->{onerror}->(node => $value,
6107     type => 'value lt min', ## TODOC: type
6108     level => $self->{level}->{warn});
6109     ## NOTE: Not an error.
6110     }
6111     }
6112    
6113     if (defined $max_value and defined $value_value) {
6114     if ($max_value < $value_value) {
6115     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6116     $self->{onerror}->(node => $value,
6117     type => 'value gt max', ## TODOC: type
6118     level => $self->{level}->{warn});
6119     ## NOTE: Not an error.
6120     }
6121     }
6122     }
6123 wakaba 1.150
6124 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6125    
6126 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6127     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6128     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6129     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6130     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6131 wakaba 1.140 },
6132 wakaba 1.66 check_start => sub {
6133     my ($self, $item, $element_state) = @_;
6134 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6135     $self->{onerror}->(node => $item->{node},
6136     type => 'multiple labelable fae',
6137     level => $self->{level}->{must});
6138     } else {
6139     $self->{flag}->{has_labelable} = 2;
6140     }
6141 wakaba 1.138
6142     $element_state->{id_type} = 'labelable';
6143 wakaba 1.66 },
6144 wakaba 1.52 };
6145    
6146 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
6147    
6148 wakaba 1.80 ## NOTE: "authors who are nesting repetition blocks should position such
6149     ## [repetition-block-related] buttons carefully to make clear which block a
6150 wakaba 1.150 ## button applies to." [WF2]: I have no idea how this can be tested.
6151 wakaba 1.80
6152 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6153 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6154     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6155 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6156 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6157 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6158     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6159     ## |button| elements.
6160 wakaba 1.56 action => $HTMLURIAttrChecker,
6161 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6162 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6163 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6164     'application/x-www-form-urlencoded' => 1,
6165     'multipart/form-data' => 1,
6166     'text/plain' => 1,
6167     }),
6168 wakaba 1.136 form => $HTMLFormAttrChecker,
6169 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6170     get => 1, post => 1, put => 1, delete => 1,
6171     }),
6172 wakaba 1.165 name => $FormControlNameAttrChecker,
6173 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6174 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6175     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6176 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6177     target => $HTMLTargetAttrChecker,
6178 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6179     ## attribute to support the |add| button type (as part of repetition
6180     ## template feature). It conflicts with the |template| global attribute
6181     ## introduced as part of the data template feature.
6182     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6183     ## author requirement.
6184 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6185     button => 1, submit => 1, reset => 1,
6186     }),
6187 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6188 wakaba 1.52 }, {
6189     %HTMLAttrStatus,
6190     %HTMLM12NCommonAttrStatus,
6191     accesskey => FEATURE_M12N10_REC,
6192 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6193     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6194 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6195     dataformatas => FEATURE_HTML4_REC_RESERVED,
6196     datasrc => FEATURE_HTML4_REC_RESERVED,
6197 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6198     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6199     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6200 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6201 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6202     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6203 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6204 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6205     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6206 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6207     onforminput => FEATURE_WF2_INFORMATIVE,
6208 wakaba 1.56 replace => FEATURE_WF2,
6209 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6210 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6211 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6212 wakaba 1.119 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6213     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6214 wakaba 1.52 }),
6215 wakaba 1.66 check_start => sub {
6216     my ($self, $item, $element_state) = @_;
6217 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6218     $self->{onerror}->(node => $item->{node},
6219     type => 'multiple labelable fae',
6220     level => $self->{level}->{must});
6221     } else {
6222     $self->{flag}->{has_labelable} = 2;
6223     }
6224 wakaba 1.162
6225     ## ISSUE: "The value attribute must not be present unless the form
6226     ## [content] attribute is present.": Wrong?
6227 wakaba 1.139
6228 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6229     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6230 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6231     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6232 wakaba 1.138
6233     $element_state->{id_type} = 'labelable';
6234 wakaba 1.66 },
6235 wakaba 1.52 };
6236    
6237     $Element->{$HTML_NS}->{label} = {
6238 wakaba 1.139 %HTMLPhrasingContentChecker,
6239 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC
6240     | FEATURE_XHTML2_ED,
6241 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6242 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6243 wakaba 1.138 for => sub {
6244     my ($self, $attr) = @_;
6245    
6246     ## NOTE: MUST be an ID of a labelable element.
6247    
6248     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6249     },
6250 wakaba 1.136 form => $HTMLFormAttrChecker,
6251 wakaba 1.52 }, {
6252     %HTMLAttrStatus,
6253 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6254 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
6255 wakaba 1.119 for => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6256     form => FEATURE_HTML5_DEFAULT,
6257 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6258 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6259     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6260     }),
6261 wakaba 1.139 check_start => sub {
6262     my ($self, $item, $element_state) = @_;
6263     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6264    
6265     $element_state->{has_label_original} = $self->{flag}->{has_label};
6266     $self->{flag}->{has_label} = 1;
6267     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6268 wakaba 1.155 $self->{flag}->{has_labelable}
6269     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6270 wakaba 1.139
6271     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6272     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6273     },
6274     check_end => sub {
6275     my ($self, $item, $element_state) = @_;
6276     $self->_remove_minus_elements ($element_state);
6277    
6278     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6279     $self->{flag}->{has_labelable}
6280     = $element_state->{has_labelable_original};
6281     }
6282     delete $self->{flag}->{has_label}
6283     unless $element_state->{has_label_original};
6284     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6285    
6286     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6287    
6288     $HTMLPhrasingContentChecker{check_end}->(@_);
6289     },
6290 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6291     };
6292    
6293     $Element->{$HTML_NS}->{select} = {
6294 wakaba 1.121 %HTMLChecker,
6295 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6296 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6297     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6298 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6299 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6300 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6301 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6302 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6303 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6304 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6305 wakaba 1.136 form => $HTMLFormAttrChecker,
6306 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6307 wakaba 1.165 name => $FormControlNameAttrChecker,
6308 wakaba 1.163 ## TODO: tests for on*
6309 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6310     onforminput => $HTMLEventHandlerAttrChecker,
6311     oninput => $HTMLEventHandlerAttrChecker,
6312 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6313 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6314 wakaba 1.52 }, {
6315     %HTMLAttrStatus,
6316     %HTMLM12NCommonAttrStatus,
6317 wakaba 1.56 accesskey => FEATURE_WF2,
6318 wakaba 1.119 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6319 wakaba 1.56 data => FEATURE_WF2,
6320 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6321     dataformatas => FEATURE_HTML4_REC_RESERVED,
6322     datasrc => FEATURE_HTML4_REC_RESERVED,
6323 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6324     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6325 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6326 wakaba 1.119 multiple => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6327     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6328 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6329     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6330 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6331     onforminput => FEATURE_WF2_INFORMATIVE,
6332 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6333 wakaba 1.126 oninput => FEATURE_WF2,
6334 wakaba 1.56 oninvalid => FEATURE_WF2,
6335 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6336     sdapref => FEATURE_HTML20_RFC,
6337 wakaba 1.119 size => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6338 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6339     }),
6340 wakaba 1.66 check_start => sub {
6341     my ($self, $item, $element_state) = @_;
6342 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6343     $self->{onerror}->(node => $item->{node},
6344     type => 'multiple labelable fae',
6345     level => $self->{level}->{must});
6346     } else {
6347     $self->{flag}->{has_labelable} = 2;
6348     }
6349 wakaba 1.66
6350     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6351     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6352 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6353     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6354 wakaba 1.138
6355     $element_state->{id_type} = 'labelable';
6356 wakaba 1.66 },
6357 wakaba 1.121 check_child_element => sub {
6358 wakaba 1.163 ## NOTE: (option | optgroup)*
6359    
6360 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6361     $child_is_transparent, $element_state) = @_;
6362 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6363     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6364 wakaba 1.121 $self->{onerror}->(node => $child_el,
6365     type => 'element not allowed:minus',
6366     level => $self->{level}->{must});
6367     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6368     #
6369     } elsif ($child_nsuri eq $HTML_NS and
6370     {
6371     option => 1, optgroup => 1,
6372     }->{$child_ln}) {
6373     #
6374     } else {
6375     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6376     level => $self->{level}->{must});
6377     }
6378     },
6379     check_child_text => sub {
6380     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6381     if ($has_significant) {
6382     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6383     level => $self->{level}->{must});
6384     }
6385     },
6386 wakaba 1.52 };
6387 wakaba 1.1
6388 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6389 wakaba 1.121 %HTMLPhrasingContentChecker,
6390 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6391 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6392     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6393     }, {
6394 wakaba 1.52 %HTMLAttrStatus,
6395 wakaba 1.56 data => FEATURE_WF2,
6396 wakaba 1.52 }),
6397 wakaba 1.66 check_start => sub {
6398     my ($self, $item, $element_state) = @_;
6399    
6400 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6401    
6402 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6403 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6404     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6405 wakaba 1.158
6406     $element_state->{id_type} = 'datalist';
6407 wakaba 1.66 },
6408 wakaba 1.121 ## NOTE: phrasing | option*
6409     check_child_element => sub {
6410     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6411     $child_is_transparent, $element_state) = @_;
6412 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6413     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6414 wakaba 1.121 $self->{onerror}->(node => $child_el,
6415     type => 'element not allowed:minus',
6416     level => $self->{level}->{must});
6417     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6418     #
6419     } elsif ($element_state->{phase} eq 'phrasing') {
6420     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6421     #
6422     } else {
6423     $self->{onerror}->(node => $child_el,
6424     type => 'element not allowed:phrasing',
6425     level => $self->{level}->{must});
6426     }
6427     } elsif ($element_state->{phase} eq 'option') {
6428     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6429     #
6430     } else {
6431     $self->{onerror}->(node => $child_el,
6432     type => 'element not allowed',
6433     level => $self->{level}->{must});
6434     }
6435     } elsif ($element_state->{phase} eq 'any') {
6436     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6437     $element_state->{phase} = 'phrasing';
6438     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6439     $element_state->{phase} = 'option';
6440     } else {
6441     $self->{onerror}->(node => $child_el,
6442     type => 'element not allowed',
6443     level => $self->{level}->{must});
6444     }
6445     } else {
6446     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6447     }
6448     },
6449     check_child_text => sub {
6450     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6451     if ($has_significant) {
6452     if ($element_state->{phase} eq 'phrasing') {
6453     #
6454     } elsif ($element_state->{phase} eq 'any') {
6455     $element_state->{phase} = 'phrasing';
6456     } else {
6457     $self->{onerror}->(node => $child_node,
6458     type => 'character not allowed',
6459     level => $self->{level}->{must});
6460     }
6461     }
6462     },
6463     check_end => sub {
6464     my ($self, $item, $element_state) = @_;
6465     if ($element_state->{phase} eq 'phrasing') {
6466     if ($element_state->{has_significant}) {
6467     $item->{real_parent_state}->{has_significant} = 1;
6468     } elsif ($item->{transparent}) {
6469     #
6470     } else {
6471     $self->{onerror}->(node => $item->{node},
6472     type => 'no significant content',
6473     level => $self->{level}->{should});
6474     }
6475     } else {
6476     ## NOTE: Since the content model explicitly allows a |datalist| element
6477     ## being empty, we don't raise "no significant content" error for this
6478     ## element when there is no element. (We should raise an error for
6479     ## |<datalist><br></datalist>|, however.)
6480     ## NOTE: As a side-effect, when the |datalist| element only contains
6481     ## non-conforming content, then the |phase| flag has not changed from
6482     ## |any|, no "no significant content" error is raised neither.
6483     $HTMLChecker{check_end}->(@_);
6484     }
6485     },
6486 wakaba 1.52 };
6487 wakaba 1.49
6488 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6489 wakaba 1.121 %HTMLChecker,
6490 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6491 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6492     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6493 wakaba 1.164 label => sub {},
6494 wakaba 1.52 }, {
6495     %HTMLAttrStatus,
6496     %HTMLM12NCommonAttrStatus,
6497 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6498     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6499 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6500 wakaba 1.52 }),
6501 wakaba 1.164 check_attrs2 => sub {
6502     my ($self, $item, $element_state) = @_;
6503    
6504     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6505     $self->{onerror}->(node => $item->{node},
6506     type => 'attribute missing',
6507     text => 'label',
6508     level => $self->{level}->{must});
6509     }
6510     },
6511 wakaba 1.121 check_child_element => sub {
6512     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6513     $child_is_transparent, $element_state) = @_;
6514 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6515     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6516 wakaba 1.121 $self->{onerror}->(node => $child_el,
6517     type => 'element not allowed:minus',
6518     level => $self->{level}->{must});
6519     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6520     #
6521     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6522     #
6523     } else {
6524     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6525     level => $self->{level}->{must});
6526     }
6527     },
6528     check_child_text => sub {
6529     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6530     if ($has_significant) {
6531     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6532     level => $self->{level}->{must});
6533     }
6534     },
6535 wakaba 1.52 };
6536    
6537     $Element->{$HTML_NS}->{option} = {
6538     %HTMLTextChecker,
6539 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6540 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6541     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6542 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6543     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6544     value => sub {}, ## NOTE: No restriction.
6545 wakaba 1.52 }, {
6546     %HTMLAttrStatus,
6547     %HTMLM12NCommonAttrStatus,
6548 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6549     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6550 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6551 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6552     sdapref => FEATURE_HTML20_RFC,
6553 wakaba 1.119 selected => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6554     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6555 wakaba 1.52 }),
6556     };
6557 wakaba 1.49
6558 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6559     %HTMLTextChecker,
6560 wakaba 1.121 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6561 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6562 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6563 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6564 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6565 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6566 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6567 wakaba 1.136 form => $HTMLFormAttrChecker,
6568 wakaba 1.56 ## TODO: inputmode [WF2]
6569 wakaba 1.164 maxlength => sub {
6570     my ($self, $attr, $item, $element_state) = @_;
6571    
6572     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6573    
6574 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6575 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6576     ## results in a number.
6577     my $max_allowed_value_length = 0+$1;
6578    
6579     ## ISSUE: "The the purposes of this requirement," (typo)
6580    
6581     ## ISSUE: This constraint is applied w/o CRLF normalization to
6582     ## |value| attribute, but w/ CRLF normalization to
6583     ## concept-value.
6584     my $value = $item->{node}->text_content;
6585     if (defined $value) {
6586     my $codepoint_length = length $value;
6587    
6588     if ($codepoint_length > $max_allowed_value_length) {
6589     $self->{onerror}->(node => $item->{node},
6590     type => 'value too long',
6591     level => $self->{level}->{must});
6592     }
6593     }
6594     }
6595     },
6596 wakaba 1.165 name => $FormControlNameAttrChecker,
6597 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6598     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6599     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6600 wakaba 1.161 pattern => $PatternAttrChecker,
6601 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6602 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6603 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6604     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6605     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6606 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6607 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6608 wakaba 1.52 }, {
6609     %HTMLAttrStatus,
6610     %HTMLM12NCommonAttrStatus,
6611 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6612 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6613 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
6614 wakaba 1.121 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6615     cols => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6616 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6617 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6618     datasrc => FEATURE_HTML4_REC_RESERVED,
6619 wakaba 1.121 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6620     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6621 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6622 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6623 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6624     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6625 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6626     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6627     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6628 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6629     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6630     oninput => FEATURE_WF2, ## TODO: tests
6631     oninvalid => FEATURE_WF2, ## TODO: tests
6632 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6633 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6634 wakaba 1.121 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6635     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6636     rows => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6637 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6638     sdapref => FEATURE_HTML20_RFC,
6639 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6640 wakaba 1.121 wrap => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6641 wakaba 1.52 }),
6642 wakaba 1.66 check_start => sub {
6643     my ($self, $item, $element_state) = @_;
6644 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6645     $self->{onerror}->(node => $item->{node},
6646     type => 'multiple labelable fae',
6647     level => $self->{level}->{must});
6648     } else {
6649     $self->{flag}->{has_labelable} = 2;
6650     }
6651 wakaba 1.164
6652     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6653     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6654     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6655    
6656     $element_state->{id_type} = 'labelable';
6657     },
6658     check_attrs2 => sub {
6659     my ($self, $item, $element_state) = @_;
6660 wakaba 1.66
6661 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6662     not $item->{node}->has_attribute_ns (undef, 'title')) {
6663     ## NOTE: WF2 (dropped by HTML5)
6664     $self->{onerror}->(node => $item->{node},
6665     type => 'attribute missing',
6666     text => 'title',
6667     level => $self->{level}->{should});
6668     }
6669    
6670 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6671     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6672     if (defined $wrap) {
6673     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6674     if ($wrap eq 'hard') {
6675     $self->{onerror}->(node => $item->{node},
6676     type => 'attribute missing',
6677     text => 'cols',
6678     level => $self->{level}->{must});
6679     }
6680     }
6681     }
6682 wakaba 1.66 },
6683 wakaba 1.52 };
6684 wakaba 1.49
6685 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6686 wakaba 1.121 %HTMLPhrasingContentChecker,
6687     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6688 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6689 wakaba 1.165 for => sub {
6690     my ($self, $attr) = @_;
6691    
6692     ## NOTE: "Unordered set of unique space-separated tokens".
6693    
6694     my %word;
6695     for my $word (grep {length $_}
6696     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6697     unless ($word{$word}) {
6698     $word{$word} = 1;
6699     push @{$self->{idref}}, ['any', $word, $attr];
6700     } else {
6701     $self->{onerror}->(node => $attr, type => 'duplicate token',
6702     value => $word,
6703     level => $self->{level}->{must});
6704     }
6705     }
6706     },
6707 wakaba 1.136 form => $HTMLFormAttrChecker,
6708 wakaba 1.165 name => $FormControlNameAttrChecker,
6709     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6710     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6711 wakaba 1.56 }, {
6712 wakaba 1.52 %HTMLAttrStatus,
6713 wakaba 1.121 for => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6714     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6715     name => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6716 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6717     onformchange => FEATURE_WF2,
6718     onforminput => FEATURE_WF2,
6719 wakaba 1.52 }),
6720     };
6721    
6722     $Element->{$HTML_NS}->{isindex} = {
6723     %HTMLEmptyChecker,
6724 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6725     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6726 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6727     prompt => sub {}, ## NOTE: Text [M12N]
6728     }, {
6729     %HTMLAttrStatus,
6730 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6731     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6732     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6733     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6734 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6735 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6736 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6737     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6738 wakaba 1.52 }),
6739     ## TODO: Tests
6740     ## TODO: Tests for <nest/> in <isindex>
6741 wakaba 1.66 check_start => sub {
6742     my ($self, $item, $element_state) = @_;
6743    
6744     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6745 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6746     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6747 wakaba 1.66 },
6748 wakaba 1.52 };
6749 wakaba 1.49
6750 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6751 wakaba 1.40 %HTMLChecker,
6752 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6753 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6754 wakaba 1.91 charset => sub {
6755     my ($self, $attr) = @_;
6756    
6757     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6758     $self->{onerror}->(type => 'attribute not allowed',
6759     node => $attr,
6760 wakaba 1.104 level => $self->{level}->{must});
6761 wakaba 1.91 }
6762    
6763     $HTMLCharsetChecker->($attr->value, @_);
6764     },
6765 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6766 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6767 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6768     async => $GetHTMLBooleanAttrChecker->('async'),
6769 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6770 wakaba 1.49 }, {
6771     %HTMLAttrStatus,
6772 wakaba 1.153 async => FEATURE_HTML5_WD,
6773     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6774     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6775 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6776     for => FEATURE_HTML4_REC_RESERVED,
6777 wakaba 1.154 href => FEATURE_RDFA_REC,
6778 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6779 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6780 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6781     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6782 wakaba 1.9 }),
6783 wakaba 1.40 check_start => sub {
6784     my ($self, $item, $element_state) = @_;
6785 wakaba 1.1
6786 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6787     $element_state->{must_be_empty} = 1;
6788 wakaba 1.1 } else {
6789     ## NOTE: No content model conformance in HTML5 spec.
6790 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6791     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6792 wakaba 1.1 if ((defined $type and $type eq '') or
6793     (defined $language and $language eq '')) {
6794     $type = 'text/javascript';
6795     } elsif (defined $type) {
6796     #
6797     } elsif (defined $language) {
6798     $type = 'text/' . $language;
6799     } else {
6800     $type = 'text/javascript';
6801     }
6802 wakaba 1.93
6803     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6804     $type = "$1/$2";
6805     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6806     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6807     }
6808     $element_state->{script_type} = $type;
6809 wakaba 1.40 }
6810 wakaba 1.66
6811     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6812 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6813     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6814 wakaba 1.107
6815     $element_state->{text} = '';
6816 wakaba 1.40 },
6817     check_child_element => sub {
6818     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6819     $child_is_transparent, $element_state) = @_;
6820 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6821     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6822 wakaba 1.40 $self->{onerror}->(node => $child_el,
6823     type => 'element not allowed:minus',
6824 wakaba 1.104 level => $self->{level}->{must});
6825 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6826     #
6827     } else {
6828     if ($element_state->{must_be_empty}) {
6829     $self->{onerror}->(node => $child_el,
6830 wakaba 1.104 type => 'element not allowed:empty',
6831     level => $self->{level}->{must});
6832 wakaba 1.40 }
6833     }
6834     },
6835     check_child_text => sub {
6836     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6837     if ($has_significant and
6838     $element_state->{must_be_empty}) {
6839     $self->{onerror}->(node => $child_node,
6840 wakaba 1.104 type => 'character not allowed:empty',
6841     level => $self->{level}->{must});
6842 wakaba 1.40 }
6843 wakaba 1.115 $element_state->{text} .= $child_node->data;
6844 wakaba 1.40 },
6845     check_end => sub {
6846     my ($self, $item, $element_state) = @_;
6847     unless ($element_state->{must_be_empty}) {
6848 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6849     ## NOTE: XML content should be checked by THIS instance of checker
6850     ## as part of normal tree validation.
6851 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6852     type => 'XML script lang',
6853     text => $element_state->{script_type},
6854     level => $self->{level}->{uncertain});
6855     ## ISSUE: Should we raise some kind of error for
6856     ## <script type="text/xml">aaaaa</script>?
6857     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6858 wakaba 1.93 } else {
6859     $self->{onsubdoc}->({s => $element_state->{text},
6860     container_node => $item->{node},
6861     media_type => $element_state->{script_type},
6862     is_char_string => 1});
6863     }
6864 wakaba 1.40
6865     $HTMLChecker{check_end}->(@_);
6866 wakaba 1.1 }
6867     },
6868 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6869     ## NOTE: "When used to include script data, the script data must be embedded
6870     ## inline, the format of the data must be given using the type attribute,
6871     ## and the src attribute must not be specified." - not testable.
6872     ## TODO: It would be possible to err <script type=text/plain src=...>
6873 wakaba 1.1 };
6874 wakaba 1.25 ## ISSUE: Significant check and text child node
6875 wakaba 1.1
6876     ## NOTE: When script is disabled.
6877     $Element->{$HTML_NS}->{noscript} = {
6878 wakaba 1.40 %HTMLTransparentChecker,
6879 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6880 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6881     %HTMLAttrStatus,
6882     %HTMLM12NCommonAttrStatus,
6883 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6884 wakaba 1.49 }),
6885 wakaba 1.40 check_start => sub {
6886     my ($self, $item, $element_state) = @_;
6887 wakaba 1.3
6888 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6889 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6890     level => $self->{level}->{must});
6891 wakaba 1.3 }
6892    
6893 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6894     $self->_add_minus_elements ($element_state,
6895     {$HTML_NS => {noscript => 1}});
6896     }
6897 wakaba 1.79
6898     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6899     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6900 wakaba 1.3 },
6901 wakaba 1.40 check_child_element => sub {
6902     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6903     $child_is_transparent, $element_state) = @_;
6904     if ($self->{flag}->{in_head}) {
6905 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6906     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6907 wakaba 1.40 $self->{onerror}->(node => $child_el,
6908     type => 'element not allowed:minus',
6909 wakaba 1.104 level => $self->{level}->{must});
6910 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6911     #
6912     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6913     #
6914     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6915     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6916     $self->{onerror}->(node => $child_el,
6917     type => 'element not allowed:head noscript',
6918 wakaba 1.104 level => $self->{level}->{must});
6919 wakaba 1.40 }
6920     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6921 wakaba 1.47 my $http_equiv_attr
6922     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6923     if ($http_equiv_attr) {
6924     ## TODO: case
6925     if (lc $http_equiv_attr->value eq 'content-type') {
6926 wakaba 1.40 $self->{onerror}->(node => $child_el,
6927 wakaba 1.34 type => 'element not allowed:head noscript',
6928 wakaba 1.104 level => $self->{level}->{must});
6929 wakaba 1.47 } else {
6930     #
6931 wakaba 1.3 }
6932 wakaba 1.47 } else {
6933     $self->{onerror}->(node => $child_el,
6934     type => 'element not allowed:head noscript',
6935 wakaba 1.104 level => $self->{level}->{must});
6936 wakaba 1.3 }
6937 wakaba 1.40 } else {
6938     $self->{onerror}->(node => $child_el,
6939     type => 'element not allowed:head noscript',
6940 wakaba 1.104 level => $self->{level}->{must});
6941 wakaba 1.40 }
6942     } else {
6943     $HTMLTransparentChecker{check_child_element}->(@_);
6944     }
6945     },
6946     check_child_text => sub {
6947     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6948     if ($self->{flag}->{in_head}) {
6949     if ($has_significant) {
6950     $self->{onerror}->(node => $child_node,
6951 wakaba 1.104 type => 'character not allowed',
6952     level => $self->{level}->{must});
6953 wakaba 1.3 }
6954     } else {
6955 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6956     }
6957     },
6958     check_end => sub {
6959     my ($self, $item, $element_state) = @_;
6960     $self->_remove_minus_elements ($element_state);
6961     if ($self->{flag}->{in_head}) {
6962     $HTMLChecker{check_end}->(@_);
6963     } else {
6964     $HTMLPhrasingContentChecker{check_end}->(@_);
6965 wakaba 1.3 }
6966 wakaba 1.1 },
6967     };
6968 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6969 wakaba 1.1
6970     $Element->{$HTML_NS}->{'event-source'} = {
6971 wakaba 1.40 %HTMLEmptyChecker,
6972 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6973     check_attrs => $GetHTMLAttrsChecker->({
6974     src => $HTMLURIAttrChecker,
6975     }, {
6976     %HTMLAttrStatus,
6977     src => FEATURE_HTML5_LC_DROPPED,
6978     }),
6979     check_start => sub {
6980     my ($self, $item, $element_state) = @_;
6981    
6982     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6983     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6984     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6985     },
6986     };
6987    
6988     $Element->{$HTML_NS}->{eventsource} = {
6989     %HTMLEmptyChecker,
6990 wakaba 1.153 status => FEATURE_HTML5_WD,
6991 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6992 wakaba 1.1 src => $HTMLURIAttrChecker,
6993 wakaba 1.50 }, {
6994     %HTMLAttrStatus,
6995 wakaba 1.153 src => FEATURE_HTML5_WD,
6996 wakaba 1.1 }),
6997 wakaba 1.66 check_start => sub {
6998     my ($self, $item, $element_state) = @_;
6999    
7000     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7001 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7002     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7003 wakaba 1.66 },
7004 wakaba 1.1 };
7005    
7006     $Element->{$HTML_NS}->{details} = {
7007 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
7008 wakaba 1.153 status => FEATURE_HTML5_LC,
7009 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7010 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
7011 wakaba 1.50 }, {
7012     %HTMLAttrStatus,
7013 wakaba 1.153 open => FEATURE_HTML5_LC,
7014 wakaba 1.1 }),
7015     };
7016    
7017     $Element->{$HTML_NS}->{datagrid} = {
7018 wakaba 1.72 %HTMLFlowContentChecker,
7019 wakaba 1.48 status => FEATURE_HTML5_WD,
7020 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7021 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7022     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7023 wakaba 1.50 }, {
7024     %HTMLAttrStatus,
7025     disabled => FEATURE_HTML5_WD,
7026     multiple => FEATURE_HTML5_WD,
7027 wakaba 1.1 }),
7028 wakaba 1.40 check_start => sub {
7029     my ($self, $item, $element_state) = @_;
7030 wakaba 1.1
7031 wakaba 1.40 $self->_add_minus_elements ($element_state,
7032     {$HTML_NS => {a => 1, datagrid => 1}});
7033     $element_state->{phase} = 'any';
7034 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7035     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7036 wakaba 1.40 },
7037 wakaba 1.95 ## NOTE: Flow -(text* (table|select|datalist) Flow*) | table | select |
7038     ## datalist | Empty
7039 wakaba 1.40 check_child_element => sub {
7040     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7041     $child_is_transparent, $element_state) = @_;
7042 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7043     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7044 wakaba 1.40 $self->{onerror}->(node => $child_el,
7045     type => 'element not allowed:minus',
7046 wakaba 1.104 level => $self->{level}->{must});
7047 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7048     #
7049 wakaba 1.72 } elsif ($element_state->{phase} eq 'flow') {
7050     if ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
7051 wakaba 1.44 if (not $element_state->{has_element} and
7052 wakaba 1.40 $child_nsuri eq $HTML_NS and
7053 wakaba 1.95 {
7054     table => 1, select => 1, datalist => 1,
7055     }->{$child_ln}) {
7056 wakaba 1.40 $self->{onerror}->(node => $child_el,
7057 wakaba 1.104 type => 'element not allowed',
7058     level => $self->{level}->{must});
7059 wakaba 1.40 } else {
7060 wakaba 1.8 #
7061 wakaba 1.1 }
7062 wakaba 1.40 } else {
7063     $self->{onerror}->(node => $child_el,
7064 wakaba 1.121 type => 'element not allowed', ## TODO: :flow
7065 wakaba 1.104 level => $self->{level}->{must});
7066 wakaba 1.40 }
7067 wakaba 1.43 $element_state->{has_element} = 1;
7068 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
7069     if ($child_nsuri eq $HTML_NS and
7070     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
7071     $element_state->{phase} = 'none';
7072 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
7073 wakaba 1.40 $element_state->{has_element} = 1;
7074 wakaba 1.72 $element_state->{phase} = 'flow';
7075 wakaba 1.40 } else {
7076     $self->{onerror}->(node => $child_el,
7077 wakaba 1.104 type => 'element not allowed',
7078     level => $self->{level}->{must});
7079 wakaba 1.40 }
7080     } elsif ($element_state->{phase} eq 'none') {
7081     $self->{onerror}->(node => $child_el,
7082 wakaba 1.104 type => 'element not allowed',
7083     level => $self->{level}->{must});
7084 wakaba 1.40 } else {
7085     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
7086     }
7087     },
7088     check_child_text => sub {
7089     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7090     if ($has_significant) {
7091 wakaba 1.72 if ($element_state->{phase} eq 'flow') {
7092 wakaba 1.40 #
7093     } elsif ($element_state->{phase} eq 'any') {
7094 wakaba 1.72 $element_state->{phase} = 'flow';
7095 wakaba 1.40 } else {
7096     $self->{onerror}->(node => $child_node,
7097 wakaba 1.104 type => 'character not allowed',
7098     level => $self->{level}->{must});
7099 wakaba 1.1 }
7100     }
7101 wakaba 1.40 },
7102     check_end => sub {
7103     my ($self, $item, $element_state) = @_;
7104     $self->_remove_minus_elements ($element_state);
7105 wakaba 1.1
7106 wakaba 1.95 if ($element_state->{phase} eq 'flow') {
7107     if ($element_state->{has_significant}) {
7108     $item->{real_parent_state}->{has_significant} = 1;
7109     } elsif ($item->{transparent}) {
7110     #
7111     } else {
7112     $self->{onerror}->(node => $item->{node},
7113 wakaba 1.104 type => 'no significant content',
7114 wakaba 1.110 level => $self->{level}->{should});
7115 wakaba 1.95 }
7116     } else {
7117     ## NOTE: Since the content model explicitly allows a |datagird| element
7118     ## being empty, we don't raise "no significant content" error for this
7119     ## element when there is no element. (We should raise an error for
7120     ## |<datagrid><br></datagrid>|, however.)
7121     ## NOTE: As a side-effect, when the |datagrid| element only contains
7122     ## non-conforming content, then the |phase| flag has not changed from
7123     ## |any|, no "no significant content" error is raised neither.
7124     ## NOTE: Another side-effect of the current implementation:
7125     ## |<daragrid><datagrid/></datagrid>| has no "no significant content"
7126     ## error at all.
7127 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7128     }
7129     },
7130 wakaba 1.1 };
7131    
7132     $Element->{$HTML_NS}->{command} = {
7133 wakaba 1.40 %HTMLEmptyChecker,
7134 wakaba 1.48 status => FEATURE_HTML5_WD,
7135 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7136 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7137     default => $GetHTMLBooleanAttrChecker->('default'),
7138     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7139     icon => $HTMLURIAttrChecker,
7140     label => sub { }, ## NOTE: No conformance creteria
7141     radiogroup => sub { }, ## NOTE: No conformance creteria
7142     type => sub {
7143     my ($self, $attr) = @_;
7144     my $value = $attr->value;
7145     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7146 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7147     level => $self->{level}->{must});
7148 wakaba 1.1 }
7149     },
7150 wakaba 1.50 }, {
7151     %HTMLAttrStatus,
7152     checked => FEATURE_HTML5_WD,
7153     default => FEATURE_HTML5_WD,
7154     disabled => FEATURE_HTML5_WD,
7155     icon => FEATURE_HTML5_WD,
7156     label => FEATURE_HTML5_WD,
7157     radiogroup => FEATURE_HTML5_WD,
7158     type => FEATURE_HTML5_WD,
7159 wakaba 1.1 }),
7160 wakaba 1.66 check_start => sub {
7161     my ($self, $item, $element_state) = @_;
7162    
7163     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7164 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7165     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7166 wakaba 1.66 },
7167 wakaba 1.115 };
7168    
7169     $Element->{$HTML_NS}->{bb} = {
7170     %HTMLPhrasingContentChecker,
7171 wakaba 1.153 status => FEATURE_HTML5_WD,
7172 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7173     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7174     }, {
7175     %HTMLAttrStatus,
7176 wakaba 1.153 type => FEATURE_HTML5_WD,
7177 wakaba 1.115 }),
7178 wakaba 1.130 check_start => sub {
7179     my ($self, $item, $element_state) = @_;
7180     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7181    
7182     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7183     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7184     },
7185     check_end => sub {
7186     my ($self, $item, $element_state) = @_;
7187     $self->_remove_minus_elements ($element_state);
7188    
7189     $HTMLTransparentChecker{check_end}->(@_);
7190     },
7191 wakaba 1.1 };
7192    
7193     $Element->{$HTML_NS}->{menu} = {
7194 wakaba 1.40 %HTMLPhrasingContentChecker,
7195 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7196     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7197     ## NOTE: We don't want any |menu| element warned as deprecated.
7198 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7199 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7200 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7201 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7202     ## implementation, it does not match.)
7203 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7204     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7205 wakaba 1.49 }, {
7206     %HTMLAttrStatus,
7207     %HTMLM12NCommonAttrStatus,
7208 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7209 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7210 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7211 wakaba 1.50 label => FEATURE_HTML5_WD,
7212 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7213 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7214     sdapref => FEATURE_HTML20_RFC,
7215 wakaba 1.50 type => FEATURE_HTML5_WD,
7216 wakaba 1.1 }),
7217 wakaba 1.40 check_start => sub {
7218     my ($self, $item, $element_state) = @_;
7219     $element_state->{phase} = 'li or phrasing';
7220     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7221     $self->{flag}->{in_menu} = 1;
7222 wakaba 1.79
7223     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7224     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7225 wakaba 1.135 $element_state->{id_type} = 'menu';
7226 wakaba 1.40 },
7227     check_child_element => sub {
7228     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7229     $child_is_transparent, $element_state) = @_;
7230 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7231     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7232 wakaba 1.40 $self->{onerror}->(node => $child_el,
7233     type => 'element not allowed:minus',
7234 wakaba 1.104 level => $self->{level}->{must});
7235 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7236     #
7237     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7238     if ($element_state->{phase} eq 'li') {
7239     #
7240     } elsif ($element_state->{phase} eq 'li or phrasing') {
7241     $element_state->{phase} = 'li';
7242     } else {
7243 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7244     level => $self->{level}->{must});
7245 wakaba 1.40 }
7246     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7247     if ($element_state->{phase} eq 'phrasing') {
7248     #
7249     } elsif ($element_state->{phase} eq 'li or phrasing') {
7250     $element_state->{phase} = 'phrasing';
7251     } else {
7252 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7253     level => $self->{level}->{must});
7254 wakaba 1.40 }
7255     } else {
7256 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7257     level => $self->{level}->{must});
7258 wakaba 1.40 }
7259     },
7260     check_child_text => sub {
7261     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7262     if ($has_significant) {
7263     if ($element_state->{phase} eq 'phrasing') {
7264     #
7265     } elsif ($element_state->{phase} eq 'li or phrasing') {
7266     $element_state->{phase} = 'phrasing';
7267     } else {
7268     $self->{onerror}->(node => $child_node,
7269 wakaba 1.104 type => 'character not allowed',
7270     level => $self->{level}->{must});
7271 wakaba 1.1 }
7272     }
7273 wakaba 1.40 },
7274     check_end => sub {
7275     my ($self, $item, $element_state) = @_;
7276     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7277    
7278     if ($element_state->{phase} eq 'li') {
7279     $HTMLChecker{check_end}->(@_);
7280     } else { # 'phrasing' or 'li or phrasing'
7281     $HTMLPhrasingContentChecker{check_end}->(@_);
7282 wakaba 1.1 }
7283     },
7284 wakaba 1.8 };
7285    
7286     $Element->{$HTML_NS}->{datatemplate} = {
7287 wakaba 1.40 %HTMLChecker,
7288 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7289 wakaba 1.40 check_child_element => sub {
7290     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7291     $child_is_transparent, $element_state) = @_;
7292 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7293     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7294 wakaba 1.40 $self->{onerror}->(node => $child_el,
7295     type => 'element not allowed:minus',
7296 wakaba 1.104 level => $self->{level}->{must});
7297 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7298     #
7299     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7300     #
7301     } else {
7302     $self->{onerror}->(node => $child_el,
7303 wakaba 1.104 type => 'element not allowed:datatemplate',
7304     level => $self->{level}->{must});
7305 wakaba 1.40 }
7306     },
7307     check_child_text => sub {
7308     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7309     if ($has_significant) {
7310 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7311     level => $self->{level}->{must});
7312 wakaba 1.8 }
7313     },
7314     is_xml_root => 1,
7315     };
7316    
7317     $Element->{$HTML_NS}->{rule} = {
7318 wakaba 1.40 %HTMLChecker,
7319 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7320 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7321 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7322 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7323 wakaba 1.50 }, {
7324     %HTMLAttrStatus,
7325     condition => FEATURE_HTML5_AT_RISK,
7326     mode => FEATURE_HTML5_AT_RISK,
7327 wakaba 1.8 }),
7328 wakaba 1.40 check_start => sub {
7329     my ($self, $item, $element_state) = @_;
7330 wakaba 1.79
7331 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7332 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7333     $self->{flag}->{in_rule} = 1;
7334    
7335     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7336     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7337 wakaba 1.40 },
7338     check_child_element => sub { },
7339     check_child_text => sub { },
7340     check_end => sub {
7341     my ($self, $item, $element_state) = @_;
7342 wakaba 1.79
7343 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7344 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7345    
7346 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7347 wakaba 1.8 },
7348     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7349     ## is applied to some conforming data, results in a conforming DOM tree.":
7350     ## We don't check against this.
7351     };
7352    
7353     $Element->{$HTML_NS}->{nest} = {
7354 wakaba 1.40 %HTMLEmptyChecker,
7355 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7356 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7357 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7358     mode => sub {
7359     my ($self, $attr) = @_;
7360     my $value = $attr->value;
7361 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7362 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7363     level => $self->{level}->{must});
7364 wakaba 1.23 }
7365     },
7366 wakaba 1.50 }, {
7367     %HTMLAttrStatus,
7368     filter => FEATURE_HTML5_AT_RISK,
7369     mode => FEATURE_HTML5_AT_RISK,
7370 wakaba 1.8 }),
7371 wakaba 1.1 };
7372    
7373     $Element->{$HTML_NS}->{legend} = {
7374 wakaba 1.40 %HTMLPhrasingContentChecker,
7375 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7376 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7377 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
7378 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
7379     # top => 1, bottom => 1, left => 1, right => 1,
7380     # }),
7381 wakaba 1.167 form => $HTMLFormAttrChecker,
7382 wakaba 1.52 }, {
7383 wakaba 1.49 %HTMLAttrStatus,
7384     %HTMLM12NCommonAttrStatus,
7385     accesskey => FEATURE_M12N10_REC,
7386     align => FEATURE_M12N10_REC_DEPRECATED,
7387 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7388 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7389 wakaba 1.49 }),
7390 wakaba 1.170 check_child_element => sub {
7391     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7392     $child_is_transparent, $element_state) = @_;
7393     if ($item->{parent_state}->{in_figure}) {
7394     $HTMLFlowContentChecker{check_child_element}->(@_);
7395     } else {
7396     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7397     }
7398     },
7399     check_child_text => sub {
7400     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7401     if ($item->{parent_state}->{in_figure}) {
7402     $HTMLFlowContentChecker{check_child_text}->(@_);
7403     } else {
7404     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7405     }
7406     },
7407     check_start => sub {
7408     my ($self, $item, $element_state) = @_;
7409     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7410    
7411     $HTMLFlowContentChecker{check_start}->(@_);
7412     },
7413     check_end => sub {
7414     my ($self, $item, $element_state) = @_;
7415     $self->_remove_minus_elements ($element_state);
7416    
7417     $HTMLFlowContentChecker{check_end}->(@_);
7418     },
7419     }; # legend
7420 wakaba 1.1
7421     $Element->{$HTML_NS}->{div} = {
7422 wakaba 1.72 %HTMLFlowContentChecker,
7423 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
7424 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7425     align => $GetHTMLEnumeratedAttrChecker->({
7426     left => 1, center => 1, right => 1, justify => 1,
7427     }),
7428     }, {
7429 wakaba 1.49 %HTMLAttrStatus,
7430 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7431 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7432     datafld => FEATURE_HTML4_REC_RESERVED,
7433     dataformatas => FEATURE_HTML4_REC_RESERVED,
7434     datasrc => FEATURE_HTML4_REC_RESERVED,
7435 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7436 wakaba 1.49 }),
7437 wakaba 1.66 check_start => sub {
7438     my ($self, $item, $element_state) = @_;
7439    
7440     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7441 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7442     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7443 wakaba 1.66 },
7444 wakaba 1.1 };
7445    
7446 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7447 wakaba 1.72 %HTMLFlowContentChecker,
7448 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7449     check_attrs => $GetHTMLAttrsChecker->({}, {
7450     %HTMLAttrStatus,
7451     %HTMLM12NCommonAttrStatus,
7452 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7453 wakaba 1.64 }),
7454     };
7455    
7456 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7457 wakaba 1.40 %HTMLTransparentChecker,
7458 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7459 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7460     ## TODO: HTML4 |size|, |color|, |face|
7461 wakaba 1.49 }, {
7462     %HTMLAttrStatus,
7463 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7464 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7465 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7466 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7467 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7468     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7469 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7470 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7471     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7472 wakaba 1.49 }),
7473 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7474     ## it is allowed only in a document with the WYSIWYG signature. The
7475     ## checker does not check whether there is the signature, since the
7476     ## signature is dropped, too, and has never been implemented. (In addition,
7477     ## for any |font| element an "element not defined" error is raised anyway,
7478     ## such that we don't have to raise an additional error.)
7479 wakaba 1.1 };
7480 wakaba 1.49
7481 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7482     %HTMLEmptyChecker,
7483     status => FEATURE_M12N10_REC_DEPRECATED,
7484     check_attrs => $GetHTMLAttrsChecker->({
7485     ## TODO: color, face, size
7486     }, {
7487     %HTMLAttrStatus,
7488     color => FEATURE_M12N10_REC_DEPRECATED,
7489     face => FEATURE_M12N10_REC_DEPRECATED,
7490 wakaba 1.153 #id => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
7491     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7492 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7493     }),
7494     };
7495    
7496 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7497     ## class title id cols rows onload onunload style(x10)
7498     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7499     ## noframes Common, lang(xhtml10)
7500    
7501 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7502 wakaba 1.56
7503 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7504     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7505     ## xmp, listing sdapref[HTML2,0]
7506    
7507 wakaba 1.56 =pod
7508    
7509 wakaba 1.61 HTML 2.0 nextid @n
7510    
7511     RFC 2659: CERTS CRYPTOPTS
7512    
7513     ISO-HTML: pre-html, divN
7514 wakaba 1.82
7515     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7516     di (Common), nl (Common), handler (Common, type), standby (Common),
7517     summary (Common)
7518    
7519 wakaba 1.97 Access & XHTML2: access (LC)
7520 wakaba 1.82
7521     XML Events & XForms (for XHTML2 support; very, very low priority)
7522 wakaba 1.61
7523 wakaba 1.56 =cut
7524 wakaba 1.61
7525     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7526     ## We added them only to |a|. |link| and |form| might also allow them
7527     ## in theory.
7528 wakaba 1.1
7529     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7530    
7531     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24