/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.195 - (hide annotations) (download)
Sat Aug 22 09:05:32 2009 UTC (15 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.194: +55 -11 lines
++ whatpm/t/dom-conformance/ChangeLog	22 Aug 2009 08:39:48 -0000
	* html-flows-1.dat: Added and revised test data on |header| and
	|hgroup| elements (HTML5 revision 3039 and HTML5 revision 3040).

2009-08-22  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	22 Aug 2009 08:39:33 -0000
	* HTML.pm: Reimplemented |header| and |hgroup| elements (HTML5
	revision 3039 and HTML revision 3040).

2009-08-22  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.174 ## --- Feature Status ---
10    
11 wakaba 1.187 sub FEATURE_HTML5_REC () {
12 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
13 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14     Whatpm::ContentChecker::FEATURE_ALLOWED
15 wakaba 1.187
16     ## Strictly speaking, HTML5's "implemented and widely deployed"
17     ## status does not necessarily satisfy the condition for
18     ## FEATURE_STATUS_REC, since there is no test cases for most of
19     ## features marked as "implemented" in HTML5. Nevertheless, we
20     ## special-case HTML5's this status as if that had passed the CR
21     ## phase, considering HTML's history.
22 wakaba 1.89 }
23 wakaba 1.187
24 wakaba 1.154 sub FEATURE_HTML5_CR () {
25     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
26     Whatpm::ContentChecker::FEATURE_STATUS_CR |
27     Whatpm::ContentChecker::FEATURE_ALLOWED
28     }
29 wakaba 1.54 sub FEATURE_HTML5_LC () {
30 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
31 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
32     Whatpm::ContentChecker::FEATURE_ALLOWED
33     }
34     sub FEATURE_HTML5_AT_RISK () {
35 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
36     ## status.
37 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40     sub FEATURE_HTML5_WD () {
41 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
42 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
43     Whatpm::ContentChecker::FEATURE_ALLOWED
44     }
45     sub FEATURE_HTML5_FD () {
46 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
47 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
48     Whatpm::ContentChecker::FEATURE_ALLOWED
49     }
50     sub FEATURE_HTML5_DEFAULT () {
51 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
52 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
53     Whatpm::ContentChecker::FEATURE_ALLOWED
54 wakaba 1.49 }
55 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
56 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
57     ## comments, but then dropped.
58 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
59     }
60 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
61 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
62     ## then dropped.
63 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
64     }
65 wakaba 1.154
66 wakaba 1.119 sub FEATURE_WF2X () {
67 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
68     ## incorporated into the HTML5 spec.
69 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
70     }
71 wakaba 1.54 sub FEATURE_WF2 () {
72 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
73     ## merged into HTML5.
74 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
75 wakaba 1.54 }
76 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
77 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
78     ## were not merged into HTML5.
79 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
80     }
81 wakaba 1.49
82 wakaba 1.154 sub FEATURE_RDFA_REC () {
83     Whatpm::ContentChecker::FEATURE_STATUS_REC
84 wakaba 1.121 }
85 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
86     ## NOTE: The feature that was defined in a RDFa last call working
87     ## draft, but then dropped.
88 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
89     }
90 wakaba 1.58
91     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
92     ## attribute can be used- the only requirements for that matter is:
93     ## "the attribute MUST be referenced using its namespace-qualified form" (and
94     ## this is a host language conformance!).
95 wakaba 1.82 sub FEATURE_ROLE_LC () {
96     Whatpm::ContentChecker::FEATURE_STATUS_LC
97     }
98    
99     sub FEATURE_XHTML2_ED () {
100 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
101     ## "http://www.w3.org/1999/xhtml".
102 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
103     }
104 wakaba 1.58
105 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
106 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
107     ## M12N).
108     Whatpm::ContentChecker::FEATURE_STATUS_REC
109 wakaba 1.55 }
110     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
111 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
112     ## features.
113     Whatpm::ContentChecker::FEATURE_STATUS_REC |
114 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
115     }
116    
117 wakaba 1.154 sub FEATURE_RUBY_REC () {
118     Whatpm::ContentChecker::FEATURE_STATUS_CR
119 wakaba 1.82 }
120    
121 wakaba 1.154 sub FEATURE_M12N11_LC () {
122     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
123     Whatpm::ContentChecker::FEATURE_STATUS_REC;
124 wakaba 1.99 }
125    
126 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
127     ## It contains a number of problems. (However, again, it's a REC!)
128 wakaba 1.54 sub FEATURE_M12N10_REC () {
129 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
130 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
131     }
132     sub FEATURE_M12N10_REC_DEPRECATED () {
133     Whatpm::ContentChecker::FEATURE_STATUS_REC |
134     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
135     }
136 wakaba 1.49
137     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
138     ## (second edition). Only missing attributes from M12N10 abstract
139     ## definition are added.
140 wakaba 1.54 sub FEATURE_XHTML10_REC () {
141     Whatpm::ContentChecker::FEATURE_STATUS_CR
142     }
143    
144 wakaba 1.61 ## NOTE: Diff from HTML4.
145     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
146     Whatpm::ContentChecker::FEATURE_STATUS_CR
147     }
148 wakaba 1.58
149 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
150     ## 4.01). Only missing attributes from XHTML10 are added.
151 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
152     Whatpm::ContentChecker::FEATURE_STATUS_WD
153     }
154    
155     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
156     ## rather than presentational attributes (deprecated or not deprecated).
157 wakaba 1.48
158 wakaba 1.61 ## NOTE: Diff from HTML4.
159     sub FEATURE_HTML32_REC_OBSOLETE () {
160     Whatpm::ContentChecker::FEATURE_STATUS_CR |
161     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
162     ## NOTE: Lowercase normative "should".
163     }
164    
165     sub FEATURE_RFC2659 () { ## Experimental RFC
166     Whatpm::ContentChecker::FEATURE_STATUS_CR
167     }
168    
169     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
170     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
171     Whatpm::ContentChecker::FEATURE_STATUS_CR
172     }
173    
174     ## NOTE: Diff from HTML 2.0.
175     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
176     Whatpm::ContentChecker::FEATURE_STATUS_CR
177     }
178    
179     ## NOTE: Diff from HTML 3.2.
180     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
181     Whatpm::ContentChecker::FEATURE_STATUS_CR
182     }
183 wakaba 1.58
184 wakaba 1.174 ## --- Content Model ---
185    
186 wakaba 1.29 ## December 2007 HTML5 Classification
187    
188     my $HTMLMetadataContent = {
189     $HTML_NS => {
190     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
191 wakaba 1.118 'event-source' => 1, eventsource => 1,
192     command => 1, datatemplate => 1,
193 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
194     ## a metadata content other than |head| element.
195     meta => 1,
196     },
197     ## NOTE: RDF is mentioned in the HTML5 spec.
198     ## TODO: Other RDF elements?
199     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
200     };
201    
202 wakaba 1.72 my $HTMLFlowContent = {
203 wakaba 1.29 $HTML_NS => {
204     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
205 wakaba 1.195 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, hgroup => 1,
206     header => 1,
207 wakaba 1.29 footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
208     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
209 wakaba 1.119 form => 1, fieldset => 1,
210 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
211     datagrid => 1, ## ISSUE: "Flow element" in spec.
212 wakaba 1.29 datatemplate => 1,
213     div => 1, ## ISSUE: No category in spec.
214     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
215     ## Additionally, it must be before any other element or
216     ## non-inter-element-whitespace text node.
217     style => 1,
218    
219 wakaba 1.193 ## These phrasing content are also categorized as flow content.
220 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
221 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
222     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
223 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
224 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
225     command => 1, bb => 1,
226 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
227 wakaba 1.194 textarea => 1, keygen => 1, output => 1,
228     datagrid => 1,
229 wakaba 1.29 ## NOTE: |area| is allowed only as a descendant of |map|.
230     area => 1,
231 wakaba 1.193
232     ## Flow/phrasing content whose content model is transparent.
233 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
234 wakaba 1.29
235 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
236 wakaba 1.29 menu => 1,
237    
238 wakaba 1.193 ## These embeded content are also categorized as flow content.
239 wakaba 1.29 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
240     canvas => 1,
241     },
242    
243 wakaba 1.193 ## These embedded content are also categorized as flow content.
244 wakaba 1.29 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
245     q<http://www.w3.org/2000/svg> => {svg => 1},
246 wakaba 1.193
247     ## And, non-inter-element-whitespace text nodes.
248     }; # $HTMLFlowContent
249 wakaba 1.29
250 wakaba 1.58 my $HTMLSectioningContent = {
251 wakaba 1.57 $HTML_NS => {
252     section => 1, nav => 1, article => 1, aside => 1,
253     ## NOTE: |body| is only allowed in |html| element.
254     body => 1,
255     },
256     };
257    
258 wakaba 1.58 my $HTMLSectioningRoot = {
259 wakaba 1.29 $HTML_NS => {
260 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
261 wakaba 1.29 },
262     };
263    
264     my $HTMLHeadingContent = {
265     $HTML_NS => {
266 wakaba 1.195 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, hgroup => 1,
267 wakaba 1.29 },
268     };
269    
270     my $HTMLPhrasingContent = {
271 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
272 wakaba 1.29 $HTML_NS => {
273 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
274 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
275     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
276 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
277 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
278     command => 1, bb => 1,
279 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
280 wakaba 1.194 textarea => 1, keygen => 1, output => 1,
281     datagrid => 1,
282 wakaba 1.29 ## NOTE: |area| is allowed only as a descendant of |map|.
283     area => 1,
284    
285     ## NOTE: Transparent.
286 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
287 wakaba 1.29
288 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
289 wakaba 1.29 menu => 1,
290    
291 wakaba 1.193 ## These embedded content is also categorized as phrasing content.
292 wakaba 1.29 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
293     canvas => 1,
294     },
295    
296 wakaba 1.193 ## These embedded content is also categorized as phrasing content.
297 wakaba 1.29 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
298     q<http://www.w3.org/2000/svg> => {svg => 1},
299    
300 wakaba 1.193 ## And, non-inter-element-whitespace text nodes.
301     }; # $HTMLPhrasingContent
302 wakaba 1.29
303 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
304 wakaba 1.29
305     my $HTMLInteractiveContent = {
306     $HTML_NS => {
307     a => 1,
308 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
309 wakaba 1.194 keygen => 1, details => 1, datagrid => 1, bb => 1,
310 wakaba 1.130
311     ## NOTE: When "controls" attribute is specified.
312     video => 1, audio => 1,
313    
314     ## NOTE: When "type=toolbar" attribute is specified.
315     menu => 1,
316 wakaba 1.29 },
317     };
318    
319 wakaba 1.139 ## NOTE: Labelable form-associated element.
320     my $LabelableFAE = {
321     $HTML_NS => {
322 wakaba 1.194 input => 1, button => 1, select => 1, textarea => 1, keygen => 1,
323 wakaba 1.139 },
324     };
325    
326 wakaba 1.192 ## Check whether the labelable form-associated element is allowed to
327     ## place there or not and mark the element ID, if any, might be used
328     ## in the |for| attribute of a |label| element.
329     my $FAECheckStart = sub {
330     my ($self, $item, $element_state) = @_;
331    
332     $element_state->{id_type} = 'labelable';
333     }; # $FAECheckStart
334     my $FAECheckAttrs2 = sub {
335     my ($self, $item, $element_state) = @_;
336    
337     ## This must be done in "check_attrs2" phase since it requires the
338     ## |id| attribute of the element, if any, reflected to the
339     ## |$self->{id}| hash.
340    
341     CHK: {
342     if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
343     my $for = $self->{flag}->{label_for};
344     if (defined $for) {
345     my $id_attrs = $self->{id}->{$for};
346     if ($id_attrs and $id_attrs->[0]) {
347     my $el = $id_attrs->[0]->owner_element;
348     if ($el and $el eq $item->{node}) {
349     ## Even if there is an ancestor |label| element with its
350     ## |for| attribute specified, the attribute value
351     ## identifies THIS element, then there is no problem.
352     last CHK;
353     }
354     }
355     }
356    
357     $self->{onerror}->(node => $item->{node},
358     type => 'multiple labelable fae',
359     level => $self->{level}->{must});
360     } else {
361     $self->{flag}->{has_labelable} = 2;
362     }
363     } # CHK
364     }; # $FAECheckAttrs2
365    
366 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
367    
368 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
369     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
370    
371     ## -- Common attribute syntacx checkers
372    
373 wakaba 1.1 our $AttrChecker;
374 wakaba 1.82 our $AttrStatus;
375 wakaba 1.1
376     my $GetHTMLEnumeratedAttrChecker = sub {
377     my $states = shift; # {value => conforming ? 1 : -1}
378     return sub {
379     my ($self, $attr) = @_;
380     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
381     if ($states->{$value} > 0) {
382     #
383     } elsif ($states->{$value}) {
384 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
385     level => $self->{level}->{must});
386 wakaba 1.1 } else {
387 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
388     level => $self->{level}->{must});
389 wakaba 1.1 }
390     };
391     }; # $GetHTMLEnumeratedAttrChecker
392    
393     my $GetHTMLBooleanAttrChecker = sub {
394     my $local_name = shift;
395     return sub {
396     my ($self, $attr) = @_;
397 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
398 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
399 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
400 wakaba 1.104 level => $self->{level}->{must});
401 wakaba 1.1 }
402     };
403     }; # $GetHTMLBooleanAttrChecker
404    
405 wakaba 1.8 ## Unordered set of space-separated tokens
406 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
407     my $allowed_words = shift;
408     return sub {
409     my ($self, $attr) = @_;
410     my %word;
411 wakaba 1.132 for my $word (grep {length $_}
412     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
413 wakaba 1.92 unless ($word{$word}) {
414     $word{$word} = 1;
415     if (not defined $allowed_words or
416     $allowed_words->{$word}) {
417     #
418     } else {
419 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
420 wakaba 1.92 value => $word,
421 wakaba 1.104 level => $self->{level}->{must});
422 wakaba 1.92 }
423     } else {
424 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
425     value => $word,
426     level => $self->{level}->{must});
427 wakaba 1.92 }
428 wakaba 1.8 }
429 wakaba 1.92 };
430     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
431 wakaba 1.8
432 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
433 wakaba 1.1 ## whose allowed values are defined by the section on link types)
434     my $HTMLLinkTypesAttrChecker = sub {
435 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
436 wakaba 1.1 my %word;
437 wakaba 1.132 for my $word (grep {length $_}
438     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
439 wakaba 1.1 unless ($word{$word}) {
440     $word{$word} = 1;
441 wakaba 1.18 } elsif ($word eq 'up') {
442     #
443 wakaba 1.1 } else {
444 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
445     value => $word,
446     level => $self->{level}->{must});
447 wakaba 1.1 }
448     }
449     ## NOTE: Case sensitive match (since HTML5 spec does not say link
450     ## types are case-insensitive and it says "The value should not
451     ## be confusingly similar to any other defined value (e.g.
452     ## differing only in case).").
453     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
454     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
455     ## values to be used conformingly.
456 wakaba 1.66
457     my $is_hyperlink;
458     my $is_resource;
459 wakaba 1.1 require Whatpm::_LinkTypeList;
460     our $LinkType;
461     for my $word (keys %word) {
462     my $def = $LinkType->{$word};
463     if (defined $def) {
464     if ($def->{status} eq 'accepted') {
465     if (defined $def->{effect}->[$a_or_area]) {
466     #
467     } else {
468     $self->{onerror}->(node => $attr,
469 wakaba 1.104 type => 'link type:bad context',
470     value => $word,
471 wakaba 1.110 level => $self->{level}->{must});
472 wakaba 1.1 }
473     } elsif ($def->{status} eq 'proposal') {
474 wakaba 1.104 $self->{onerror}->(node => $attr,
475     type => 'link type:proposed',
476     value => $word,
477     level => $self->{level}->{should});
478 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
479     #
480     } else {
481     $self->{onerror}->(node => $attr,
482 wakaba 1.104 type => 'link type:bad context',
483     value => $word,
484     level => $self->{level}->{must});
485 wakaba 1.20 }
486 wakaba 1.1 } else { # rejected or synonym
487     $self->{onerror}->(node => $attr,
488 wakaba 1.104 type => 'link type:non-conforming',
489     value => $word,
490     level => $self->{level}->{must});
491 wakaba 1.1 }
492 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
493     if ($word eq 'alternate') {
494     #
495     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
496 wakaba 1.66 $is_hyperlink = 1;
497 wakaba 1.4 }
498     }
499 wakaba 1.1 if ($def->{unique}) {
500     unless ($self->{has_link_type}->{$word}) {
501     $self->{has_link_type}->{$word} = 1;
502     } else {
503     $self->{onerror}->(node => $attr,
504 wakaba 1.104 type => 'link type:duplicate',
505     value => $word,
506     level => $self->{level}->{must});
507 wakaba 1.1 }
508     }
509 wakaba 1.66
510     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
511     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
512     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
513     }
514 wakaba 1.1 } else {
515 wakaba 1.104 $self->{onerror}->(node => $attr,
516     type => 'unknown link type',
517     value => $word,
518     level => $self->{level}->{uncertain});
519 wakaba 1.1 }
520     }
521 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
522 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
523     ## says that using both X-Pingback: header field and HTML
524     ## <link rel=pingback> is deprecated and if both appears they
525     ## SHOULD contain exactly the same value.
526     ## ISSUE: Pingback 1.0 specification defines the exact representation
527     ## of its link element, which cannot be tested by the current arch.
528     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
529     ## include any string that matches to the pattern for the rel=pingback link,
530     ## which again inpossible to test.
531     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
532 wakaba 1.12
533     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
534 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
535     ## then they SHOULD be described in different paragraphs.".
536 wakaba 1.66
537     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
538     if ($is_hyperlink or $a_or_area) {
539     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
540     }
541     if ($is_resource and not $a_or_area) {
542     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
543     }
544 wakaba 1.96
545     $element_state->{link_rel} = \%word;
546 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
547 wakaba 1.20
548     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
549 wakaba 1.1
550     ## URI (or IRI)
551     my $HTMLURIAttrChecker = sub {
552 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
553 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
554     my $value = $attr->value;
555     Whatpm::URIChecker->check_iri_reference ($value, sub {
556 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
557 wakaba 1.106 }), $self->{level};
558 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
559 wakaba 1.66
560     my $attr_name = $attr->name;
561     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
562     ## TODO: absolute
563     push @{$self->{return}->{uri}->{$value} ||= []},
564     $element_state->{uri_info}->{$attr_name};
565 wakaba 1.1 }; # $HTMLURIAttrChecker
566    
567     ## A space separated list of one or more URIs (or IRIs)
568     my $HTMLSpaceURIsAttrChecker = sub {
569     my ($self, $attr) = @_;
570 wakaba 1.66
571     my $type = {ping => 'action',
572     profile => 'namespace',
573     archive => 'resource'}->{$attr->name};
574    
575 wakaba 1.1 my $i = 0;
576 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
577 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
578 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
579 wakaba 1.106 }, $self->{level});
580 wakaba 1.66
581     ## TODO: absolute
582     push @{$self->{return}->{uri}->{$value} ||= []},
583 wakaba 1.67 {node => $attr, type => {$type => 1}};
584 wakaba 1.66
585 wakaba 1.1 $i++;
586     }
587 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
588 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
589     ## ISSUE: A sequence of white space characters are conformant?
590     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
591 wakaba 1.132 ## ISSUE: What is "space"?
592 wakaba 1.1 ## NOTE: Duplication seems not an error.
593 wakaba 1.4 $self->{has_uri_attr} = 1;
594 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
595    
596 wakaba 1.156 my $ValidEmailAddress;
597     {
598     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
599     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
600     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
601     }
602    
603 wakaba 1.168 ## Valid global date and time.
604     my $GetDateTimeAttrChecker = sub ($) {
605     my $type = shift;
606     return sub {
607     my ($self, $attr, $item, $element_state) = @_;
608    
609     my $range_error;
610    
611     require Message::Date;
612     my $dp = Message::Date->new;
613     $dp->{level} = $self->{level};
614     $dp->{onerror} = sub {
615     my %opt = @_;
616     unless ($opt{type} eq 'date value not supported') {
617     $self->{onerror}->(%opt, node => $attr);
618     $range_error = '';
619     }
620     };
621    
622     my $method = 'parse_' . $type;
623     my $d = $dp->$method ($attr->value);
624     $element_state->{date_value}->{$attr->name} = $d || $range_error;
625     };
626     }; # $GetDateTimeAttrChecker
627 wakaba 1.1
628     my $HTMLIntegerAttrChecker = sub {
629     my ($self, $attr) = @_;
630     my $value = $attr->value;
631     unless ($value =~ /\A-?[0-9]+\z/) {
632 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
633     level => $self->{level}->{must});
634 wakaba 1.1 }
635     }; # $HTMLIntegerAttrChecker
636    
637     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
638     my $range_check = shift;
639     return sub {
640     my ($self, $attr) = @_;
641     my $value = $attr->value;
642     if ($value =~ /\A[0-9]+\z/) {
643     unless ($range_check->($value + 0)) {
644 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
645     level => $self->{level}->{must});
646 wakaba 1.1 }
647     } else {
648     $self->{onerror}->(node => $attr,
649 wakaba 1.104 type => 'nninteger:syntax error',
650     level => $self->{level}->{must});
651 wakaba 1.1 }
652     };
653     }; # $GetHTMLNonNegativeIntegerAttrChecker
654    
655     my $GetHTMLFloatingPointNumberAttrChecker = sub {
656     my $range_check = shift;
657     return sub {
658 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
659 wakaba 1.1 my $value = $attr->value;
660 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
661     $value =~ /\A-?\.[0-9]+\z/) {
662 wakaba 1.168 if ($range_check->($value + 0)) {
663     ## TODO: parse algorithm
664     $element_state->{number_value}->{$attr->name} = $value + 0;
665     } else {
666 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
667     level => $self->{level}->{must});
668 wakaba 1.1 }
669     } else {
670     $self->{onerror}->(node => $attr,
671 wakaba 1.104 type => 'float:syntax error',
672     level => $self->{level}->{must});
673 wakaba 1.1 }
674     };
675 wakaba 1.144
676     ## TODO: scientific notation
677 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
678    
679 wakaba 1.148 my $StepAttrChecker = sub {
680     ## NOTE: A valid floating point number (> 0), or ASCII
681     ## case-insensitive "any".
682    
683     my ($self, $attr) = @_;
684     my $value = $attr->value;
685     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
686     $value =~ /\A-?\.[0-9]+\z/) {
687     unless ($value > 0) {
688     $self->{onerror}->(node => $attr, type => 'float:out of range',
689     level => $self->{level}->{must});
690     }
691     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
692     #
693     } else {
694     $self->{onerror}->(node => $attr,
695     type => 'float:syntax error',
696     level => $self->{level}->{must});
697     }
698    
699     ## TODO: scientific
700     }; # $StepAttrChecker
701    
702 wakaba 1.86 ## HTML4 %Length;
703     my $HTMLLengthAttrChecker = sub {
704     my ($self, $attr) = @_;
705     my $value = $attr->value;
706     unless ($value =~ /\A[0-9]+%?\z/) {
707     $self->{onerror}->(node => $attr, type => 'length:syntax error',
708 wakaba 1.104 level => $self->{level}->{must});
709 wakaba 1.86 }
710    
711     ## NOTE: HTML4 definition is too vague - it does not define the syntax
712     ## of percentage value at all (!).
713     }; # $HTMLLengthAttrChecker
714    
715 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
716     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
717     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
718    
719 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
720     ## ISSUE: RFC 2046 does not define syntax of media types.
721     ## ISSUE: The definition of "a valid MIME type" is unknown.
722     ## Syntactical correctness?
723     my $HTMLIMTAttrChecker = sub {
724     my ($self, $attr) = @_;
725     my $value = $attr->value;
726     ## ISSUE: RFC 2045 Content-Type header field allows insertion
727     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
728     ## ISSUE: RFC 2231 extension? Maybe no.
729     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
730     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
731 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
732 wakaba 1.1 my @type = ($1, $2);
733     my $param = $3;
734 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
735 wakaba 1.1 if (defined $2) {
736     push @type, $1 => $2;
737     } else {
738     my $n = $1;
739 wakaba 1.152 my $v = $3;
740 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
741 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
742 wakaba 1.1 }
743     }
744     require Whatpm::IMTChecker;
745 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
746     $ic->{level} = $self->{level};
747     $ic->check_imt (sub {
748 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
749 wakaba 1.1 }, @type);
750     } else {
751 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
752     level => $self->{level}->{must});
753 wakaba 1.1 }
754     }; # $HTMLIMTAttrChecker
755    
756     my $HTMLLanguageTagAttrChecker = sub {
757 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
758    
759 wakaba 1.1 my ($self, $attr) = @_;
760 wakaba 1.6 my $value = $attr->value;
761     require Whatpm::LangTag;
762     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
763 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
764 wakaba 1.106 }, $self->{level});
765 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
766 wakaba 1.6
767     ## TODO: testdata
768 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
769    
770     ## "A valid media query [MQ]"
771     my $HTMLMQAttrChecker = sub {
772     my ($self, $attr) = @_;
773 wakaba 1.104 $self->{onerror}->(node => $attr,
774     type => 'media query',
775     level => $self->{level}->{uncertain});
776 wakaba 1.1 ## ISSUE: What is "a valid media query"?
777     }; # $HTMLMQAttrChecker
778    
779     my $HTMLEventHandlerAttrChecker = sub {
780     my ($self, $attr) = @_;
781 wakaba 1.104 $self->{onerror}->(node => $attr,
782     type => 'event handler',
783     level => $self->{level}->{uncertain});
784 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
785     ## ECMAScript |FunctionBody| production. [ECMA262]
786     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
787     ## ISSUE: Automatic semicolon insertion does not apply?
788     ## ISSUE: Other script languages?
789     }; # $HTMLEventHandlerAttrChecker
790    
791 wakaba 1.136 my $HTMLFormAttrChecker = sub {
792     my ($self, $attr) = @_;
793    
794     ## NOTE: MUST be the ID of a |form| element.
795    
796     my $value = $attr->value;
797 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
798 wakaba 1.136
799     ## ISSUE: <form id=""><input form=""> (empty ID)?
800     }; # $HTMLFormAttrChecker
801    
802 wakaba 1.158 my $ListAttrChecker = sub {
803     my ($self, $attr) = @_;
804    
805     ## NOTE: MUST be the ID of a |datalist| element.
806    
807     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
808    
809     ## TODO: Warn violation to control-dependent restrictions. For
810     ## example, |<input type=url maxlength=10 list=a> <datalist
811     ## id=a><option value=nonurlandtoolong></datalist>| should be
812     ## warned.
813     }; # $ListAttrChecker
814    
815 wakaba 1.160 my $PatternAttrChecker = sub {
816     my ($self, $attr) = @_;
817     $self->{onsubdoc}->({s => $attr->value,
818     container_node => $attr,
819     media_type => 'text/x-regexp-js',
820     is_char_string => 1});
821 wakaba 1.161
822     ## ISSUE: "value must match the Pattern production of ECMA 262's
823     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
824    
825     ## TODO: Warn if @value does not match @pattern.
826 wakaba 1.160 }; # $PatternAttrChecker
827    
828 wakaba 1.161 my $AcceptAttrChecker = sub {
829     my ($self, $attr) = @_;
830    
831     my $value = $attr->value;
832     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
833     my @value = length $value ? split /,/, $value, -1 : ('');
834     my %has_value;
835     for my $v (@value) {
836     if ($has_value{$v}) {
837     $self->{onerror}->(node => $attr,
838     type => 'duplicate token',
839     value => $v,
840     level => $self->{level}->{must});
841     next;
842     }
843     $has_value{$v} = 1;
844    
845     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
846     #
847     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
848     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
849     ## define its own syntax citing RFC 4288.
850    
851     ## NOTE: Parameters not allowed.
852     require Whatpm::IMTChecker;
853     my $ic = Whatpm::IMTChecker->new;
854     $ic->{level} = $self->{level};
855     $ic->check_imt (sub {
856     $self->{onerror}->(@_, node => $attr);
857     }, $1, $2);
858     } else {
859     $self->{onerror}->(node => $attr,
860     type => 'IMTnp:syntax error', ## TODOC: type
861     value => $v,
862     level => $self->{level}->{must});
863     }
864     }
865     }; # $AcceptAttrChecker
866    
867 wakaba 1.165 my $FormControlNameAttrChecker = sub {
868     my ($self, $attr) = @_;
869    
870     unless (length $attr->value) {
871     $self->{onerror}->(node => $attr,
872     type => 'empty control name', ## TODOC: type
873     level => $self->{level}->{must});
874     }
875    
876     ## NOTE: No uniqueness constraint.
877     }; # $FormControlNameAttrChecker
878    
879     my $AutofocusAttrChecker = sub {
880     my ($self, $attr) = @_;
881    
882     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
883    
884     if ($self->{has_autofocus}) {
885     $self->{onerror}->(node => $attr,
886     type => 'duplicate autofocus', ## TODOC: type
887     level => $self->{level}->{must});
888     }
889     $self->{has_autofocus} = 1;
890     }; # $AutofocusAttrChekcer
891    
892 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
893     my ($self, $attr) = @_;
894 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
895 wakaba 1.1 my $value = $attr->value;
896     if ($value =~ s/^#//) {
897 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
898     ## according to the "rules for parsing a hash-name reference" algorithm.
899     ## The document is non-conforming anyway, since |<map name="">| (empty
900     ## name) is non-conforming.
901 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
902     } else {
903 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
904     level => $self->{level}->{must});
905 wakaba 1.1 }
906 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
907 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
908     }; # $HTMLUsemapAttrChecker
909    
910 wakaba 1.76 ## Valid browsing context name
911     my $HTMLBrowsingContextNameAttrChecker = sub {
912     my ($self, $attr) = @_;
913     my $value = $attr->value;
914     if ($value =~ /^_/) {
915     $self->{onerror}->(node => $attr, type => 'window name:reserved',
916 wakaba 1.104 level => $self->{level}->{must},
917 wakaba 1.76 value => $value);
918     } elsif (length $value) {
919     #
920     } else {
921     $self->{onerror}->(node => $attr, type => 'window name:empty',
922 wakaba 1.104 level => $self->{level}->{must});
923 wakaba 1.76 }
924     }; # $HTMLBrowsingContextNameAttrChecker
925    
926     ## Valid browsing context name or keyword
927 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
928     my ($self, $attr) = @_;
929     my $value = $attr->value;
930     if ($value =~ /^_/) {
931     $value = lc $value; ## ISSUE: ASCII case-insentitive?
932     unless ({
933 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
934 wakaba 1.1 }->{$value}) {
935     $self->{onerror}->(node => $attr,
936 wakaba 1.76 type => 'window name:reserved',
937 wakaba 1.104 level => $self->{level}->{must},
938 wakaba 1.76 value => $value);
939 wakaba 1.1 }
940 wakaba 1.76 } elsif (length $value) {
941     #
942 wakaba 1.1 } else {
943 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
944 wakaba 1.104 level => $self->{level}->{must});
945 wakaba 1.1 }
946     }; # $HTMLTargetAttrChecker
947    
948 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
949     my ($self, $attr) = @_;
950    
951     ## ISSUE: Namespace resolution?
952    
953     my $value = $attr->value;
954    
955     require Whatpm::CSS::SelectorsParser;
956     my $p = Whatpm::CSS::SelectorsParser->new;
957     $p->{pseudo_class}->{$_} = 1 for qw/
958     active checked disabled empty enabled first-child first-of-type
959     focus hover indeterminate last-child last-of-type link only-child
960     only-of-type root target visited
961     lang nth-child nth-last-child nth-of-type nth-last-of-type not
962     -manakai-contains -manakai-current
963     /;
964    
965     $p->{pseudo_element}->{$_} = 1 for qw/
966     after before first-letter first-line
967     /;
968    
969 wakaba 1.104 $p->{level} = $self->{level};
970 wakaba 1.23 $p->{onerror} = sub {
971 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
972 wakaba 1.23 };
973     $p->parse_string ($value);
974     }; # $HTMLSelectorsAttrChecker
975    
976 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
977     my ($charset_value, $self, $attr, $ascii_compat) = @_;
978    
979     ## NOTE: This code is used for |charset=""| attributes, |charset=|
980     ## portion of the |content=""| attributes, and |accept-charset=""|
981     ## attributes.
982 wakaba 1.91
983     ## NOTE: Though the case-sensitivility of |charset| attribute value
984     ## is not explicitly spelled in the HTML5 spec, the Character Set
985     ## registry of IANA, which is referenced from HTML5 spec, says that
986     ## charset name is case-insensitive.
987     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
988    
989     require Message::Charset::Info;
990     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
991    
992     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
993     ## Syntactically valid and registered? What about x-charset names?
994     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
995     ($charset_value)) {
996     $self->{onerror}->(node => $attr,
997 wakaba 1.104 type => 'charset:syntax error',
998     value => $charset_value,
999     level => $self->{level}->{must});
1000 wakaba 1.91 }
1001    
1002     if ($charset) {
1003     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1004     ## with no "preferred MIME name" label)?
1005     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1006     if (($charset_status &
1007     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1008     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1009     $self->{onerror}->(node => $attr,
1010 wakaba 1.104 type => 'charset:not preferred',
1011     value => $charset_value,
1012     level => $self->{level}->{must});
1013 wakaba 1.91 }
1014 wakaba 1.129
1015 wakaba 1.91 if (($charset_status &
1016     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1017     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1018     if ($charset_value =~ /^x-/) {
1019     $self->{onerror}->(node => $attr,
1020 wakaba 1.104 type => 'charset:private',
1021     value => $charset_value,
1022     level => $self->{level}->{good});
1023 wakaba 1.91 } else {
1024     $self->{onerror}->(node => $attr,
1025 wakaba 1.104 type => 'charset:not registered',
1026     value => $charset_value,
1027     level => $self->{level}->{good});
1028 wakaba 1.91 }
1029     }
1030 wakaba 1.129
1031     if ($ascii_compat) {
1032     if ($charset->{category} &
1033     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
1034     #
1035     } else {
1036     $self->{onerror}->(node => $attr,
1037     type => 'charset:not ascii compat',
1038     value => $charset_value,
1039     level => $self->{level}->{must});
1040     }
1041     }
1042    
1043 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
1044     } elsif ($charset_value =~ /^x-/) {
1045     $self->{onerror}->(node => $attr,
1046 wakaba 1.104 type => 'charset:private',
1047     value => $charset_value,
1048     level => $self->{level}->{good});
1049 wakaba 1.129
1050     ## NOTE: Whether this is an ASCII-compatible character encoding or
1051     ## not is unknown.
1052 wakaba 1.91 } else {
1053     $self->{onerror}->(node => $attr,
1054 wakaba 1.104 type => 'charset:not registered',
1055     value => $charset_value,
1056     level => $self->{level}->{good});
1057 wakaba 1.129
1058     ## NOTE: Whether this is an ASCII-compatible character encoding or
1059     ## not is unknown.
1060 wakaba 1.91 }
1061    
1062     return ($charset, $charset_value);
1063     }; # $HTMLCharsetChecker
1064    
1065 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1066     ## MUST be the preferred name of an ASCII-compatible character
1067     ## encoding".
1068     my $HTMLCharsetsAttrChecker = sub {
1069     my ($self, $attr) = @_;
1070    
1071     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1072    
1073 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1074 wakaba 1.129
1075 wakaba 1.176 ## XXX
1076 wakaba 1.129 ## ISSUE: Uniqueness is not enforced.
1077    
1078     for my $charset (@value) {
1079     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1080     }
1081    
1082     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1083     }; # $HTMLCharsetsAttrChecker
1084    
1085 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1086     my ($self, $attr) = @_;
1087    
1088     ## NOTE: HTML4 "color" or |%Color;|
1089    
1090     my $value = $attr->value;
1091    
1092     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1093 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1094 wakaba 1.105 level => $self->{level}->{html4_fact});
1095 wakaba 1.68 }
1096    
1097     ## TODO: HTML4 has some guideline on usage of color.
1098     }; # $HTMLColorAttrChecker
1099    
1100 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1101     my ($self, $attr) = @_;
1102     $HTMLURIAttrChecker->(@_);
1103    
1104     my $attr_name = $attr->name;
1105    
1106     if ($attr_name eq 'ref') {
1107     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1108     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1109 wakaba 1.104 level => $self->{level}->{must});
1110 wakaba 1.79 }
1111     }
1112 wakaba 1.155
1113     require Message::URL;
1114 wakaba 1.79 my $doc = $attr->owner_document;
1115     my $doc_uri = $doc->document_uri;
1116 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1117 wakaba 1.79 my $no_frag_uri = $uri->clone;
1118     $no_frag_uri->uri_fragment (undef);
1119     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1120     (not defined $doc_uri and $no_frag_uri eq '')) {
1121     my $fragid = $uri->uri_fragment;
1122     if (defined $fragid) {
1123     push @{$self->{$attr_name}}, [$fragid => $attr];
1124     } else {
1125     DOCEL: {
1126     last DOCEL unless $attr_name eq 'template';
1127    
1128     my $docel = $doc->document_element;
1129     if ($docel) {
1130     my $nsuri = $docel->namespace_uri;
1131     if (defined $nsuri and $nsuri eq $HTML_NS) {
1132     if ($docel->manakai_local_name eq 'datatemplate') {
1133     last DOCEL;
1134     }
1135     }
1136     }
1137    
1138     $self->{onerror}->(node => $attr, type => 'template:not template',
1139 wakaba 1.104 level => $self->{level}->{must});
1140 wakaba 1.79 } # DOCEL
1141     }
1142     } else {
1143     ## TODO: An external document is referenced.
1144     ## The document MUST be an HTML or XML document.
1145     ## If there is a fragment identifier, it MUST point a part of the doc.
1146     ## If the attribute is |template|, the pointed part MUST be a
1147     ## |datatemplat| element.
1148     ## If no fragment identifier is specified, the root element MUST be
1149     ## a |datatemplate| element when the attribute is |template|.
1150     }
1151     }; # $HTMLRefOrTemplateAttrChecker
1152    
1153 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1154     my ($self, $attr) = @_;
1155    
1156     if (defined $attr->namespace_uri) {
1157     my $oe = $attr->owner_element;
1158     my $oe_nsuri = $oe->namespace_uri;
1159 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1160 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1161 wakaba 1.104 level => $self->{level}->{must});
1162 wakaba 1.83 }
1163     }
1164    
1165     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1166     }; # $HTMLRepeatIndexAttrChecker
1167    
1168 wakaba 1.179 my $PlaceholderAttrChecker = sub {
1169     my ($self, $attr) = @_;
1170     if ($attr->value =~ /[\x0D\x0A]/) {
1171     $self->{onerror}->(node => $attr,
1172     type => 'newline in value', ## TODOC: type
1173     level => $self->{level}->{must});
1174     }
1175     }; # $PlaceholderAttrChecker
1176    
1177 wakaba 1.1 my $HTMLAttrChecker = {
1178 wakaba 1.176 accesskey => sub {
1179     my ($self, $attr) = @_;
1180    
1181     ## "Ordered set of unique space-separated tokens"
1182    
1183     my %keys;
1184     my @keys = grep {length} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1185    
1186     for my $key (@keys) {
1187     unless ($keys{$key}) {
1188     $keys{$key} = 1;
1189     if (length $key != 1) {
1190     $self->{onerror}->(node => $attr, type => 'char:syntax error',
1191     value => $key,
1192     level => $self->{level}->{must});
1193     }
1194     } else {
1195     $self->{onerror}->(node => $attr, type => 'duplicate token',
1196     value => $key,
1197     level => $self->{level}->{must});
1198     }
1199     }
1200     }, # accesskey
1201    
1202 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1203 wakaba 1.1 id => sub {
1204 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1205 wakaba 1.1 my $value = $attr->value;
1206     if (length $value > 0) {
1207     if ($self->{id}->{$value}) {
1208 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1209     level => $self->{level}->{must});
1210 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1211     } else {
1212     $self->{id}->{$value} = [$attr];
1213 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1214 wakaba 1.1 }
1215 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1216 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1217     level => $self->{level}->{must});
1218 wakaba 1.1 }
1219     } else {
1220     ## NOTE: MUST contain at least one character
1221 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1222     level => $self->{level}->{must});
1223 wakaba 1.1 }
1224     },
1225     title => sub {}, ## NOTE: No conformance creteria
1226     lang => sub {
1227     my ($self, $attr) = @_;
1228 wakaba 1.6 my $value = $attr->value;
1229     if ($value eq '') {
1230     #
1231     } else {
1232     require Whatpm::LangTag;
1233     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1234 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1235 wakaba 1.106 }, $self->{level});
1236 wakaba 1.6 }
1237 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1238 wakaba 1.6
1239     ## TODO: test data
1240 wakaba 1.111
1241     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1242     ## non-conforming. Such errors are detected by the checkers of
1243     ## |{}xml:lang| and |{xml}:lang| attributes.
1244 wakaba 1.1 },
1245     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1246     class => sub {
1247     my ($self, $attr) = @_;
1248 wakaba 1.132
1249     ## NOTE: "Unordered set of unique space-separated tokens".
1250    
1251 wakaba 1.1 my %word;
1252 wakaba 1.132 for my $word (grep {length $_}
1253     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1254 wakaba 1.1 unless ($word{$word}) {
1255     $word{$word} = 1;
1256     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1257     } else {
1258 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1259     value => $word,
1260     level => $self->{level}->{must});
1261 wakaba 1.1 }
1262     }
1263     },
1264 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1265     true => 1, false => 1, '' => 1,
1266     }),
1267 wakaba 1.1 contextmenu => sub {
1268     my ($self, $attr) = @_;
1269     my $value = $attr->value;
1270 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1271 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1272     ## What is "in the DOM"? A menu Element node that is not part
1273     ## of the Document tree is in the DOM? A menu Element node that
1274     ## belong to another Document tree is in the DOM?
1275     },
1276 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1277 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1278 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1279     registrationmark => sub {
1280     my ($self, $attr, $item, $element_state) = @_;
1281    
1282     ## NOTE: Any value is conforming.
1283    
1284     if ($self->{flag}->{in_rule}) {
1285     my $el = $attr->owner_element;
1286     my $ln = $el->manakai_local_name;
1287     if ($ln eq 'nest' or
1288     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1289     my $nsuri = $el->namespace_uri;
1290     if (defined $nsuri and $nsuri eq $HTML_NS) {
1291     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1292 wakaba 1.104 level => $self->{level}->{must});
1293 wakaba 1.79 }
1294     }
1295     } else {
1296     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1297 wakaba 1.104 level => $self->{level}->{must});
1298 wakaba 1.79 }
1299     },
1300 wakaba 1.80 repeat => sub {
1301     my ($self, $attr) = @_;
1302 wakaba 1.83
1303     if (defined $attr->namespace_uri) {
1304     my $oe = $attr->owner_element;
1305     my $oe_nsuri = $oe->namespace_uri;
1306     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1307     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1308 wakaba 1.104 level => $self->{level}->{must});
1309 wakaba 1.83 }
1310     }
1311    
1312 wakaba 1.80 my $value = $attr->value;
1313     if ($value eq 'template') {
1314     #
1315     } elsif ($value =~ /\A-?[0-9]+\z/) {
1316     #
1317     } else {
1318     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1319 wakaba 1.104 level => $self->{level}->{must});
1320 wakaba 1.80 }
1321    
1322     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1323     ## that the attribute MAY be specified to any element, or that the
1324     ## element with that attribute (i.e. a repetition template) can be
1325     ## inserted anywhere in a document tree?
1326     },
1327 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1328     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1329     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1330 wakaba 1.80 'repeat-template' => sub {
1331 wakaba 1.83 my ($self, $attr) = @_;
1332    
1333     if (defined $attr->namespace_uri) {
1334     my $oe = $attr->owner_element;
1335     my $oe_nsuri = $oe->namespace_uri;
1336 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1337 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1338 wakaba 1.104 level => $self->{level}->{must});
1339 wakaba 1.83 }
1340     }
1341    
1342 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1343     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1344     ## attribute allowed on an element that is not a repetition block?
1345     },
1346 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1347 wakaba 1.184 spellcheck => $GetHTMLEnumeratedAttrChecker->({
1348     true => 1, false => 1, '' => 1,
1349     }),
1350 wakaba 1.128 style => sub {
1351     my ($self, $attr) = @_;
1352    
1353     $self->{onsubdoc}->({s => $attr->value,
1354     container_node => $attr,
1355     media_type => 'text/x-css-inline',
1356     is_char_string => 1});
1357    
1358     ## NOTE: "... MUST still be comprehensible and usable if those
1359     ## attributes were removed" is a semantic requirement, it cannot
1360     ## be tested.
1361     },
1362 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1363 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1364 wakaba 1.111 'xml:lang' => sub {
1365     my ($self, $attr) = @_;
1366    
1367     if ($attr->owner_document->manakai_is_html) {
1368     $self->{onerror}->(type => 'in HTML:xml:lang',
1369     level => $self->{level}->{info},
1370     node => $attr);
1371     ## NOTE: This is not an error, but the attribute will be ignored.
1372     } else {
1373     $self->{onerror}->(type => 'in XML:xml:lang',
1374     level => $self->{level}->{html5_no_may},
1375     node => $attr);
1376     ## TODO: We need to add test for this error.
1377     }
1378    
1379     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1380     (undef, 'lang');
1381     if ($lang_attr) {
1382     my $lang_attr_value = $lang_attr->value;
1383     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1384     my $value = $attr->value;
1385     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1386     if ($lang_attr_value ne $value) {
1387     $self->{onerror}->(type => 'xml:lang ne lang',
1388     level => $self->{level}->{must},
1389     node => $attr);
1390     }
1391     } else {
1392     $self->{onerror}->(type => 'xml:lang not allowed',
1393     level => $self->{level}->{must},
1394     node => $attr);
1395     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1396     }
1397     },
1398 wakaba 1.74 xmlns => sub {
1399     my ($self, $attr) = @_;
1400     my $value = $attr->value;
1401     unless ($value eq $HTML_NS) {
1402 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1403     level => $self->{level}->{must});
1404 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1405     }
1406     unless ($attr->owner_document->manakai_is_html) {
1407 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1408     level => $self->{level}->{must});
1409 wakaba 1.74 ## TODO: Test
1410     }
1411    
1412     ## TODO: Should be resolved?
1413     push @{$self->{return}->{uri}->{$value} ||= []},
1414     {node => $attr, type => {namespace => 1}};
1415     },
1416 wakaba 1.1 };
1417    
1418 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1419    
1420 wakaba 1.49 my %HTMLAttrStatus = (
1421 wakaba 1.176 accesskey => FEATURE_HTML5_FD,
1422 wakaba 1.187 class => FEATURE_HTML5_LC,
1423     contenteditable => FEATURE_HTML5_REC,
1424 wakaba 1.50 contextmenu => FEATURE_HTML5_WD,
1425 wakaba 1.187 dir => FEATURE_HTML5_REC,
1426 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1427 wakaba 1.187 hidden => FEATURE_HTML5_LC,
1428     id => FEATURE_HTML5_REC,
1429 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1430 wakaba 1.187 lang => FEATURE_HTML5_REC,
1431 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1432     registrationmark => FEATURE_HTML5_AT_RISK,
1433 wakaba 1.60 repeat => FEATURE_WF2,
1434     'repeat-max' => FEATURE_WF2,
1435     'repeat-min' => FEATURE_WF2,
1436     'repeat-start' => FEATURE_WF2,
1437     'repeat-template' => FEATURE_WF2,
1438 wakaba 1.154 role => 0,
1439 wakaba 1.184 spellcheck => FEATURE_HTML5_WD,
1440 wakaba 1.187 style => FEATURE_HTML5_REC,
1441 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1442     template => FEATURE_HTML5_AT_RISK,
1443 wakaba 1.187 title => FEATURE_HTML5_REC,
1444 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1445 wakaba 1.49 );
1446    
1447     my %HTMLM12NCommonAttrStatus = (
1448 wakaba 1.154 about => FEATURE_RDFA_REC,
1449 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
1450 wakaba 1.154 content => FEATURE_RDFA_REC,
1451     datatype => FEATURE_RDFA_REC,
1452 wakaba 1.187 dir => FEATURE_HTML5_REC,
1453 wakaba 1.154 href => FEATURE_RDFA_REC,
1454 wakaba 1.187 id => FEATURE_HTML5_REC,
1455 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1456 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1457     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1458     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1459     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1460     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1461     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1462     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1463     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1464     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1465     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1466 wakaba 1.154 property => FEATURE_RDFA_REC,
1467     rel => FEATURE_RDFA_REC,
1468     resource => FEATURE_RDFA_REC,
1469     rev => FEATURE_RDFA_REC,
1470 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1471 wakaba 1.78 # FEATURE_M12N10_REC,
1472 wakaba 1.187 style => FEATURE_HTML5_REC,
1473     title => FEATURE_HTML5_REC,
1474 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1475 wakaba 1.49 );
1476    
1477 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1478     ## Core
1479 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
1480     id => FEATURE_HTML5_REC,
1481 wakaba 1.82 #xml:id
1482     layout => FEATURE_XHTML2_ED,
1483 wakaba 1.187 title => FEATURE_HTML5_REC,
1484 wakaba 1.82
1485     ## Hypertext
1486     cite => FEATURE_XHTML2_ED,
1487     href => FEATURE_XHTML2_ED,
1488     hreflang => FEATURE_XHTML2_ED,
1489     hrefmedia => FEATURE_XHTML2_ED,
1490     hreftype => FEATURE_XHTML2_ED,
1491     nextfocus => FEATURE_XHTML2_ED,
1492     prevfocus => FEATURE_XHTML2_ED,
1493     target => FEATURE_XHTML2_ED,
1494     #xml:base
1495    
1496     ## I18N
1497     #xml:lang
1498    
1499     ## Bi-directional
1500 wakaba 1.187 dir => FEATURE_HTML5_REC,
1501 wakaba 1.82
1502     ## Edit
1503     edit => FEATURE_XHTML2_ED,
1504     datetime => FEATURE_XHTML2_ED,
1505    
1506     ## Embedding
1507     encoding => FEATURE_XHTML2_ED,
1508     src => FEATURE_XHTML2_ED,
1509     srctype => FEATURE_XHTML2_ED,
1510    
1511     ## Image Map
1512     usemap => FEATURE_XHTML2_ED,
1513     ismap => FEATURE_XHTML2_ED,
1514     shape => FEATURE_XHTML2_ED,
1515     coords => FEATURE_XHTML2_ED,
1516    
1517     ## Media
1518     media => FEATURE_XHTML2_ED,
1519    
1520     ## Metadata
1521     about => FEATURE_XHTML2_ED,
1522     content => FEATURE_XHTML2_ED,
1523     datatype => FEATURE_XHTML2_ED,
1524     instanceof => FEATURE_XHTML2_ED,
1525     property => FEATURE_XHTML2_ED,
1526     rel => FEATURE_XHTML2_ED,
1527     resource => FEATURE_XHTML2_ED,
1528     rev => FEATURE_XHTML2_ED,
1529    
1530     ## Role
1531 wakaba 1.154 role => FEATURE_XHTML2_ED,
1532 wakaba 1.82
1533     ## Style
1534 wakaba 1.187 style => FEATURE_HTML5_REC,
1535 wakaba 1.82 );
1536    
1537     my %HTMLM12NXHTML2CommonAttrStatus = (
1538     %HTMLM12NCommonAttrStatus,
1539     %XHTML2CommonAttrStatus,
1540    
1541 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1542 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1543 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1544     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1545 wakaba 1.187 dir => FEATURE_HTML5_REC,
1546 wakaba 1.154 href => FEATURE_RDFA_REC,
1547 wakaba 1.187 id => FEATURE_HTML5_REC,
1548 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1549     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1550     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1551     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1552     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1553 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1554 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1555 wakaba 1.187 style => FEATURE_HTML5_REC,
1556     title => FEATURE_HTML5_REC,
1557 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1558 wakaba 1.82 );
1559    
1560 wakaba 1.1 for (qw/
1561 wakaba 1.188 onabort onblur onchange onclick oncontextmenu
1562 wakaba 1.1 ondblclick ondrag ondragend ondragenter ondragleave ondragover
1563     ondragstart ondrop onerror onfocus onkeydown onkeypress
1564 wakaba 1.180 onkeyup onload onmousedown onmousemove onmouseout
1565 wakaba 1.188 onmouseover onmouseup onmousewheel onscroll onselect
1566     onsubmit
1567 wakaba 1.1 /) {
1568     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1569 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1570 wakaba 1.1 }
1571    
1572 wakaba 1.170 for (qw/
1573 wakaba 1.188 onbeforeunload onhashchange onresize onstorage onunload
1574 wakaba 1.170 ondataunavailable
1575 wakaba 1.180 onmessage
1576 wakaba 1.170 /) {
1577     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1578     $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1579     }
1580    
1581 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1582     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1583     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1584    
1585     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1586     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1587     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1588     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1589     }
1590    
1591 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1592 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1593 wakaba 1.82 }
1594 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1595     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1596 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1597     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1598     ismap layout media nextfocus prevfocus shape src srctype style
1599     target usemap/) {
1600     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1601     }
1602     for (qw/class dir id title/) {
1603     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1604     }
1605     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1606     onmouseout onkeypress onkeydown onkeyup/) {
1607     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1608     }
1609    
1610 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1611     ## NOTE: "Authors should ... when the attributes are ignored and
1612     ## any associated CSS dropped, the page is still usable." (semantic
1613     ## constraint.)
1614     }; # $HTMLDatasetAttrChecker
1615    
1616 wakaba 1.187 my $HTMLDatasetAttrStatus = FEATURE_HTML5_LC;
1617 wakaba 1.73
1618 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1619     my $element_specific_checker = shift;
1620 wakaba 1.49 my $element_specific_status = shift;
1621 wakaba 1.1 return sub {
1622 wakaba 1.40 my ($self, $item, $element_state) = @_;
1623     for my $attr (@{$item->{node}->attributes}) {
1624 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1625     $attr_ns = '' unless defined $attr_ns;
1626     my $attr_ln = $attr->manakai_local_name;
1627     my $checker;
1628 wakaba 1.73 my $status;
1629 wakaba 1.1 if ($attr_ns eq '') {
1630 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1631     $attr_ln !~ /[A-Z]/) {
1632 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1633     $status = $HTMLDatasetAttrStatus;
1634     } else {
1635     $checker = $element_specific_checker->{$attr_ln}
1636     || $HTMLAttrChecker->{$attr_ln};
1637     $status = $element_specific_status->{$attr_ln};
1638     }
1639 wakaba 1.1 }
1640     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1641 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1642 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1643     || $AttrStatus->{$attr_ns}->{''};
1644     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1645 wakaba 1.1 if ($checker) {
1646 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1647 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1648 wakaba 1.54 #
1649 wakaba 1.1 } else {
1650 wakaba 1.104 $self->{onerror}->(node => $attr,
1651     type => 'unknown attribute',
1652     level => $self->{level}->{uncertain});
1653 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1654     }
1655 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1656 wakaba 1.1 }
1657     };
1658     }; # $GetHTMLAttrsChecker
1659    
1660 wakaba 1.40 my %HTMLChecker = (
1661     %Whatpm::ContentChecker::AnyChecker,
1662 wakaba 1.79 check_start => sub {
1663     my ($self, $item, $element_state) = @_;
1664    
1665     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1666     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1667     },
1668 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1669 wakaba 1.40 );
1670    
1671     my %HTMLEmptyChecker = (
1672     %HTMLChecker,
1673     check_child_element => sub {
1674     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1675     $child_is_transparent, $element_state) = @_;
1676 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1677     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1678 wakaba 1.40 $self->{onerror}->(node => $child_el,
1679     type => 'element not allowed:minus',
1680 wakaba 1.104 level => $self->{level}->{must});
1681 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1682     #
1683     } else {
1684     $self->{onerror}->(node => $child_el,
1685     type => 'element not allowed:empty',
1686 wakaba 1.104 level => $self->{level}->{must});
1687 wakaba 1.40 }
1688     },
1689     check_child_text => sub {
1690     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1691     if ($has_significant) {
1692     $self->{onerror}->(node => $child_node,
1693     type => 'character not allowed:empty',
1694 wakaba 1.104 level => $self->{level}->{must});
1695 wakaba 1.40 }
1696     },
1697     );
1698    
1699     my %HTMLTextChecker = (
1700     %HTMLChecker,
1701     check_child_element => sub {
1702     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1703     $child_is_transparent, $element_state) = @_;
1704 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1705     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1706 wakaba 1.40 $self->{onerror}->(node => $child_el,
1707     type => 'element not allowed:minus',
1708 wakaba 1.104 level => $self->{level}->{must});
1709 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1710     #
1711     } else {
1712 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1713     level => $self->{level}->{must});
1714 wakaba 1.40 }
1715     },
1716     );
1717    
1718 wakaba 1.72 my %HTMLFlowContentChecker = (
1719 wakaba 1.40 %HTMLChecker,
1720     check_child_element => sub {
1721     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1722     $child_is_transparent, $element_state) = @_;
1723 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1724     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1725 wakaba 1.40 $self->{onerror}->(node => $child_el,
1726     type => 'element not allowed:minus',
1727 wakaba 1.104 level => $self->{level}->{must});
1728 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1729     #
1730     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1731     if ($element_state->{has_non_style} or
1732     not $child_el->has_attribute_ns (undef, 'scoped')) {
1733 wakaba 1.104 $self->{onerror}->(node => $child_el,
1734 wakaba 1.72 type => 'element not allowed:flow style',
1735 wakaba 1.104 level => $self->{level}->{must});
1736 wakaba 1.40 }
1737 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1738 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1739 wakaba 1.40 } else {
1740     $element_state->{has_non_style} = 1;
1741 wakaba 1.104 $self->{onerror}->(node => $child_el,
1742 wakaba 1.72 type => 'element not allowed:flow',
1743 wakaba 1.104 level => $self->{level}->{must})
1744 wakaba 1.40 }
1745     },
1746     check_child_text => sub {
1747     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1748     if ($has_significant) {
1749     $element_state->{has_non_style} = 1;
1750     }
1751     },
1752     check_end => sub {
1753     my ($self, $item, $element_state) = @_;
1754 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1755 wakaba 1.40 if ($element_state->{has_significant}) {
1756 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1757 wakaba 1.40 } elsif ($item->{transparent}) {
1758     #
1759     } else {
1760     $self->{onerror}->(node => $item->{node},
1761 wakaba 1.104 level => $self->{level}->{should},
1762 wakaba 1.40 type => 'no significant content');
1763     }
1764     },
1765     );
1766    
1767     my %HTMLPhrasingContentChecker = (
1768     %HTMLChecker,
1769     check_child_element => sub {
1770     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1771     $child_is_transparent, $element_state) = @_;
1772 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1773     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1774 wakaba 1.40 $self->{onerror}->(node => $child_el,
1775     type => 'element not allowed:minus',
1776 wakaba 1.104 level => $self->{level}->{must});
1777 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1778     #
1779     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1780     #
1781     } else {
1782     $self->{onerror}->(node => $child_el,
1783     type => 'element not allowed:phrasing',
1784 wakaba 1.104 level => $self->{level}->{must});
1785 wakaba 1.40 }
1786     },
1787 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1788 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1789 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1790 wakaba 1.40 ## and |check_child_text|.
1791     );
1792    
1793 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1794 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1795 wakaba 1.46 ## with parent?
1796 wakaba 1.40
1797 wakaba 1.1 our $Element;
1798     our $ElementDefault;
1799    
1800     $Element->{$HTML_NS}->{''} = {
1801 wakaba 1.40 %HTMLChecker,
1802 wakaba 1.1 };
1803    
1804     $Element->{$HTML_NS}->{html} = {
1805 wakaba 1.187 status => FEATURE_HTML5_REC,
1806 wakaba 1.1 is_root => 1,
1807 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1808 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1809 wakaba 1.67 version => sub {
1810     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1811     ## Though DTDs of various versions of HTML define the attribute
1812     ## as |#FIXED|, this conformance checker does no check for
1813     ## the attribute value, since what kind of check should be done
1814     ## is unknown.
1815     },
1816 wakaba 1.49 }, {
1817     %HTMLAttrStatus,
1818 wakaba 1.82 %XHTML2CommonAttrStatus,
1819 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1820     dir => FEATURE_HTML5_REC,
1821     id => FEATURE_HTML5_REC,
1822     lang => FEATURE_HTML5_REC,
1823 wakaba 1.153 manifest => FEATURE_HTML5_WD,
1824 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1825 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1826 wakaba 1.1 }),
1827 wakaba 1.40 check_start => sub {
1828     my ($self, $item, $element_state) = @_;
1829     $element_state->{phase} = 'before head';
1830 wakaba 1.79
1831 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1832 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1833     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1834 wakaba 1.40 },
1835     check_child_element => sub {
1836     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1837     $child_is_transparent, $element_state) = @_;
1838 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1839     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1840 wakaba 1.40 $self->{onerror}->(node => $child_el,
1841     type => 'element not allowed:minus',
1842 wakaba 1.104 level => $self->{level}->{must});
1843 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1844     #
1845     } elsif ($element_state->{phase} eq 'before head') {
1846     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1847     $element_state->{phase} = 'after head';
1848     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1849     $self->{onerror}->(node => $child_el,
1850 wakaba 1.104 type => 'ps element missing',
1851     text => 'head',
1852     level => $self->{level}->{must});
1853 wakaba 1.40 $element_state->{phase} = 'after body';
1854     } else {
1855     $self->{onerror}->(node => $child_el,
1856 wakaba 1.104 type => 'element not allowed',
1857     level => $self->{level}->{must});
1858 wakaba 1.40 }
1859     } elsif ($element_state->{phase} eq 'after head') {
1860     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1861     $element_state->{phase} = 'after body';
1862     } else {
1863     $self->{onerror}->(node => $child_el,
1864 wakaba 1.104 type => 'element not allowed',
1865     level => $self->{level}->{must});
1866 wakaba 1.40 }
1867     } elsif ($element_state->{phase} eq 'after body') {
1868     $self->{onerror}->(node => $child_el,
1869 wakaba 1.104 type => 'element not allowed',
1870     level => $self->{level}->{must});
1871 wakaba 1.40 } else {
1872     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1873     }
1874     },
1875     check_child_text => sub {
1876     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1877     if ($has_significant) {
1878     $self->{onerror}->(node => $child_node,
1879 wakaba 1.104 type => 'character not allowed',
1880     level => $self->{level}->{must});
1881 wakaba 1.40 }
1882     },
1883     check_end => sub {
1884     my ($self, $item, $element_state) = @_;
1885     if ($element_state->{phase} eq 'after body') {
1886     #
1887     } elsif ($element_state->{phase} eq 'before head') {
1888     $self->{onerror}->(node => $item->{node},
1889 wakaba 1.104 type => 'child element missing',
1890     text => 'head',
1891     level => $self->{level}->{must});
1892 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1893 wakaba 1.104 type => 'child element missing',
1894     text => 'body',
1895     level => $self->{level}->{must});
1896 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1897     $self->{onerror}->(node => $item->{node},
1898 wakaba 1.104 type => 'child element missing',
1899     text => 'body',
1900     level => $self->{level}->{must});
1901 wakaba 1.40 } else {
1902     die "check_end: Bad |html| phase: $element_state->{phase}";
1903     }
1904 wakaba 1.1
1905 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1906     },
1907     };
1908 wakaba 1.25
1909 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1910 wakaba 1.187 status => FEATURE_HTML5_REC,
1911 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1912     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1913     }, {
1914 wakaba 1.49 %HTMLAttrStatus,
1915 wakaba 1.82 %XHTML2CommonAttrStatus,
1916 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1917     dir => FEATURE_HTML5_REC,
1918     id => FEATURE_HTML5_REC,
1919     lang => FEATURE_HTML5_REC,
1920 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1921     }),
1922 wakaba 1.40 check_child_element => sub {
1923     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1924     $child_is_transparent, $element_state) = @_;
1925 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1926     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1927 wakaba 1.40 $self->{onerror}->(node => $child_el,
1928     type => 'element not allowed:minus',
1929 wakaba 1.104 level => $self->{level}->{must});
1930 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1931     #
1932     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1933     unless ($element_state->{has_title}) {
1934     $element_state->{has_title} = 1;
1935     } else {
1936     $self->{onerror}->(node => $child_el,
1937     type => 'element not allowed:head title',
1938 wakaba 1.104 level => $self->{level}->{must});
1939 wakaba 1.40 }
1940     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1941     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1942     $self->{onerror}->(node => $child_el,
1943     type => 'element not allowed:head style',
1944 wakaba 1.104 level => $self->{level}->{must});
1945 wakaba 1.1 }
1946 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1947     #
1948    
1949     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1950     ## a |meta| element with none of |charset|, |name|,
1951     ## or |http-equiv| attribute is not allowed. It is non-conforming
1952     ## anyway.
1953 wakaba 1.56
1954     ## TODO: |form| MUST be empty and in XML [WF2].
1955 wakaba 1.40 } else {
1956     $self->{onerror}->(node => $child_el,
1957     type => 'element not allowed:metadata',
1958 wakaba 1.104 level => $self->{level}->{must});
1959 wakaba 1.40 }
1960     $element_state->{in_head_original} = $self->{flag}->{in_head};
1961     $self->{flag}->{in_head} = 1;
1962     },
1963     check_child_text => sub {
1964     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1965     if ($has_significant) {
1966 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1967     level => $self->{level}->{must});
1968 wakaba 1.1 }
1969 wakaba 1.40 },
1970     check_end => sub {
1971     my ($self, $item, $element_state) = @_;
1972     unless ($element_state->{has_title}) {
1973     $self->{onerror}->(node => $item->{node},
1974 wakaba 1.104 type => 'child element missing',
1975     text => 'title',
1976 wakaba 1.105 level => $self->{level}->{must});
1977 wakaba 1.1 }
1978 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1979 wakaba 1.1
1980 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1981 wakaba 1.1 },
1982     };
1983    
1984 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1985     %HTMLTextChecker,
1986 wakaba 1.187 status => FEATURE_HTML5_REC,
1987 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1988     %HTMLAttrStatus,
1989 wakaba 1.82 %XHTML2CommonAttrStatus,
1990 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1991     dir => FEATURE_HTML5_REC,
1992     id => FEATURE_HTML5_REC,
1993     lang => FEATURE_HTML5_REC,
1994 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1995 wakaba 1.49 }),
1996 wakaba 1.40 };
1997 wakaba 1.1
1998 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1999 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2000 wakaba 1.40 %HTMLEmptyChecker,
2001     check_attrs => sub {
2002     my ($self, $item, $element_state) = @_;
2003 wakaba 1.1
2004 wakaba 1.40 if ($self->{has_base}) {
2005     $self->{onerror}->(node => $item->{node},
2006 wakaba 1.104 type => 'element not allowed:base',
2007     level => $self->{level}->{must});
2008 wakaba 1.40 } else {
2009     $self->{has_base} = 1;
2010 wakaba 1.29 }
2011    
2012 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
2013     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
2014 wakaba 1.14
2015     if ($self->{has_uri_attr} and $has_href) {
2016 wakaba 1.4 ## ISSUE: Are these examples conforming?
2017     ## <head profile="a b c"><base href> (except for |profile|'s
2018     ## non-conformance)
2019     ## <title xml:base="relative"/><base href/> (maybe it should be)
2020     ## <unknown xmlns="relative"/><base href/> (assuming that
2021     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
2022     ## <style>@import 'relative';</style><base href>
2023     ## <script>location.href = 'relative';</script><base href>
2024 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
2025     ## an exception.
2026 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2027 wakaba 1.104 type => 'basehref after URL attribute',
2028     level => $self->{level}->{must});
2029 wakaba 1.4 }
2030 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
2031 wakaba 1.4 ## ISSUE: Are these examples conforming?
2032     ## <head><title xlink:href=""/><base target="name"/></head>
2033     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
2034     ## (assuming that |xbl:xbl| is allowed before |base|)
2035     ## NOTE: These are non-conformant anyway because of |head|'s content model:
2036     ## <link href=""/><base target="name"/>
2037     ## <link rel=unknown href=""><base target=name>
2038 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2039 wakaba 1.104 type => 'basetarget after hyperlink',
2040     level => $self->{level}->{must});
2041 wakaba 1.4 }
2042    
2043 wakaba 1.14 if (not $has_href and not $has_target) {
2044 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2045 wakaba 1.104 type => 'attribute missing:href|target',
2046     level => $self->{level}->{must});
2047 wakaba 1.14 }
2048    
2049 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
2050    
2051 wakaba 1.4 return $GetHTMLAttrsChecker->({
2052     href => $HTMLURIAttrChecker,
2053     target => $HTMLTargetAttrChecker,
2054 wakaba 1.49 }, {
2055     %HTMLAttrStatus,
2056 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2057 wakaba 1.187 id => FEATURE_HTML5_REC,
2058 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2059 wakaba 1.40 })->($self, $item, $element_state);
2060 wakaba 1.4 },
2061 wakaba 1.1 };
2062    
2063     $Element->{$HTML_NS}->{link} = {
2064 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2065 wakaba 1.40 %HTMLEmptyChecker,
2066     check_attrs => sub {
2067     my ($self, $item, $element_state) = @_;
2068 wakaba 1.96 my $sizes_attr;
2069 wakaba 1.1 $GetHTMLAttrsChecker->({
2070 wakaba 1.91 charset => sub {
2071     my ($self, $attr) = @_;
2072     $HTMLCharsetChecker->($attr->value, @_);
2073     },
2074 wakaba 1.1 href => $HTMLURIAttrChecker,
2075 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2076 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2077 wakaba 1.1 media => $HTMLMQAttrChecker,
2078     hreflang => $HTMLLanguageTagAttrChecker,
2079 wakaba 1.96 sizes => sub {
2080     my ($self, $attr) = @_;
2081     $sizes_attr = $attr;
2082     my %word;
2083     for my $word (grep {length $_}
2084 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2085 wakaba 1.96 unless ($word{$word}) {
2086     $word{$word} = 1;
2087     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2088     #
2089     } else {
2090     $self->{onerror}->(node => $attr,
2091 wakaba 1.104 type => 'sizes:syntax error',
2092 wakaba 1.96 value => $word,
2093 wakaba 1.104 level => $self->{level}->{must});
2094 wakaba 1.96 }
2095     } else {
2096     $self->{onerror}->(node => $attr, type => 'duplicate token',
2097     value => $word,
2098 wakaba 1.104 level => $self->{level}->{must});
2099 wakaba 1.96 }
2100     }
2101     },
2102 wakaba 1.70 target => $HTMLTargetAttrChecker,
2103 wakaba 1.1 type => $HTMLIMTAttrChecker,
2104     ## NOTE: Though |title| has special semantics,
2105     ## syntactically same as the |title| as global attribute.
2106 wakaba 1.49 }, {
2107     %HTMLAttrStatus,
2108 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2109 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2110     ## NOTE: |charset| attribute had been part of HTML5 spec though
2111     ## it had been commented out.
2112 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2113 wakaba 1.82 FEATURE_M12N10_REC,
2114 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2115 wakaba 1.187 lang => FEATURE_HTML5_REC,
2116 wakaba 1.153 media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2117 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2118 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2119 wakaba 1.153 FEATURE_M12N10_REC,
2120 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2121 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2122 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2123 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2124 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2125     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2126 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2127 wakaba 1.40 })->($self, $item, $element_state);
2128 wakaba 1.96
2129 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2130     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2131 wakaba 1.4 } else {
2132 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2133 wakaba 1.104 type => 'attribute missing',
2134     text => 'href',
2135     level => $self->{level}->{must});
2136 wakaba 1.1 }
2137 wakaba 1.96
2138 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2139     $self->{onerror}->(node => $item->{node},
2140 wakaba 1.104 type => 'attribute missing',
2141     text => 'rel',
2142     level => $self->{level}->{must});
2143 wakaba 1.96 }
2144    
2145     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2146     $self->{onerror}->(node => $sizes_attr,
2147     type => 'attribute not allowed',
2148 wakaba 1.104 level => $self->{level}->{must});
2149 wakaba 1.1 }
2150 wakaba 1.116
2151     if ($element_state->{link_rel}->{alternate} and
2152     $element_state->{link_rel}->{stylesheet}) {
2153     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2154     unless ($title_attr) {
2155     $self->{onerror}->(node => $item->{node},
2156     type => 'attribute missing',
2157     text => 'title',
2158     level => $self->{level}->{must});
2159     } elsif ($title_attr->value eq '') {
2160     $self->{onerror}->(node => $title_attr,
2161     type => 'empty style sheet title',
2162     level => $self->{level}->{must});
2163     }
2164     }
2165 wakaba 1.1 },
2166     };
2167    
2168     $Element->{$HTML_NS}->{meta} = {
2169 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2170 wakaba 1.40 %HTMLEmptyChecker,
2171     check_attrs => sub {
2172     my ($self, $item, $element_state) = @_;
2173 wakaba 1.1 my $name_attr;
2174     my $http_equiv_attr;
2175     my $charset_attr;
2176     my $content_attr;
2177 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2178 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2179     $attr_ns = '' unless defined $attr_ns;
2180     my $attr_ln = $attr->manakai_local_name;
2181     my $checker;
2182 wakaba 1.73 my $status;
2183 wakaba 1.1 if ($attr_ns eq '') {
2184 wakaba 1.73 $status = {
2185     %HTMLAttrStatus,
2186 wakaba 1.82 %XHTML2CommonAttrStatus,
2187 wakaba 1.153 charset => FEATURE_HTML5_WD,
2188     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2189 wakaba 1.187 dir => FEATURE_HTML5_REC,
2190 wakaba 1.153 'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2191 wakaba 1.187 id => FEATURE_HTML5_REC,
2192     lang => FEATURE_HTML5_REC,
2193 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2194 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2195     }->{$attr_ln};
2196    
2197 wakaba 1.1 if ($attr_ln eq 'content') {
2198     $content_attr = $attr;
2199     $checker = 1;
2200     } elsif ($attr_ln eq 'name') {
2201     $name_attr = $attr;
2202     $checker = 1;
2203     } elsif ($attr_ln eq 'http-equiv') {
2204     $http_equiv_attr = $attr;
2205     $checker = 1;
2206     } elsif ($attr_ln eq 'charset') {
2207     $charset_attr = $attr;
2208     $checker = 1;
2209 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2210 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2211 wakaba 1.67 $checker = sub {};
2212 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2213     $attr_ln !~ /[A-Z]/) {
2214 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2215     $status = $HTMLDatasetAttrStatus;
2216 wakaba 1.1 } else {
2217     $checker = $HTMLAttrChecker->{$attr_ln}
2218 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2219 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2220     }
2221     } else {
2222     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2223 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2224     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2225     || $AttrStatus->{$attr_ns}->{''};
2226     $status = FEATURE_ALLOWED if not defined $status;
2227 wakaba 1.1 }
2228 wakaba 1.62
2229 wakaba 1.1 if ($checker) {
2230 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2231 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2232 wakaba 1.54 #
2233 wakaba 1.1 } else {
2234 wakaba 1.104 $self->{onerror}->(node => $attr,
2235     type => 'unknown attribute',
2236     level => $self->{level}->{uncertain});
2237 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2238     }
2239    
2240 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2241 wakaba 1.1 }
2242    
2243     if (defined $name_attr) {
2244     if (defined $http_equiv_attr) {
2245     $self->{onerror}->(node => $http_equiv_attr,
2246 wakaba 1.104 type => 'attribute not allowed',
2247     level => $self->{level}->{must});
2248 wakaba 1.1 } elsif (defined $charset_attr) {
2249     $self->{onerror}->(node => $charset_attr,
2250 wakaba 1.104 type => 'attribute not allowed',
2251     level => $self->{level}->{must});
2252 wakaba 1.1 }
2253     my $metadata_name = $name_attr->value;
2254     my $metadata_value;
2255     if (defined $content_attr) {
2256     $metadata_value = $content_attr->value;
2257     } else {
2258 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2259 wakaba 1.104 type => 'attribute missing',
2260     text => 'content',
2261     level => $self->{level}->{must});
2262 wakaba 1.1 $metadata_value = '';
2263     }
2264     } elsif (defined $http_equiv_attr) {
2265     if (defined $charset_attr) {
2266     $self->{onerror}->(node => $charset_attr,
2267 wakaba 1.104 type => 'attribute not allowed',
2268     level => $self->{level}->{must});
2269 wakaba 1.1 }
2270     unless (defined $content_attr) {
2271 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2272 wakaba 1.104 type => 'attribute missing',
2273     text => 'content',
2274     level => $self->{level}->{must});
2275 wakaba 1.1 }
2276     } elsif (defined $charset_attr) {
2277     if (defined $content_attr) {
2278     $self->{onerror}->(node => $content_attr,
2279 wakaba 1.104 type => 'attribute not allowed',
2280     level => $self->{level}->{must});
2281 wakaba 1.1 }
2282     } else {
2283     if (defined $content_attr) {
2284     $self->{onerror}->(node => $content_attr,
2285 wakaba 1.104 type => 'attribute not allowed',
2286     level => $self->{level}->{must});
2287 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2288 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2289     level => $self->{level}->{must});
2290 wakaba 1.1 } else {
2291 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2292 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2293     level => $self->{level}->{must});
2294 wakaba 1.1 }
2295     }
2296    
2297 wakaba 1.32 my $check_charset_decl = sub () {
2298 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2299 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2300     for my $el (@{$parent->child_nodes}) {
2301     next unless $el->node_type == 1; # ELEMENT_NODE
2302 wakaba 1.40 unless ($el eq $item->{node}) {
2303 wakaba 1.29 ## NOTE: Not the first child element.
2304 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2305 wakaba 1.32 type => 'element not allowed:meta charset',
2306 wakaba 1.104 level => $self->{level}->{must});
2307 wakaba 1.29 }
2308     last;
2309     ## NOTE: Entity references are not supported.
2310     }
2311     } else {
2312 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2313 wakaba 1.32 type => 'element not allowed:meta charset',
2314 wakaba 1.104 level => $self->{level}->{must});
2315 wakaba 1.29 }
2316 wakaba 1.32 }; # $check_charset_decl
2317 wakaba 1.21
2318 wakaba 1.32 my $check_charset = sub ($$) {
2319     my ($attr, $charset_value) = @_;
2320 wakaba 1.21
2321 wakaba 1.91 my $charset;
2322     ($charset, $charset_value)
2323     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2324    
2325 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2326 wakaba 1.21 if (defined $ic) {
2327     ## TODO: Test for this case
2328     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2329     if ($charset ne $ic_charset) {
2330 wakaba 1.32 $self->{onerror}->(node => $attr,
2331 wakaba 1.104 type => 'mismatched charset name',
2332 wakaba 1.106 text => $ic,
2333 wakaba 1.104 value => $charset_value,
2334     level => $self->{level}->{must});
2335 wakaba 1.21 }
2336     } else {
2337     ## NOTE: MUST, but not checkable, since the document is not originally
2338     ## in serialized form (or the parser does not preserve the input
2339     ## encoding information).
2340 wakaba 1.32 $self->{onerror}->(node => $attr,
2341 wakaba 1.104 type => 'mismatched charset name not checked',
2342     value => $charset_value,
2343     level => $self->{level}->{uncertain});
2344 wakaba 1.21 }
2345    
2346 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2347     $self->{onerror}->(node => $attr,
2348 wakaba 1.104 type => 'charref in charset',
2349     level => $self->{level}->{must},
2350     layer => 'syntax');
2351 wakaba 1.22 }
2352 wakaba 1.32 }; # $check_charset
2353    
2354     ## TODO: metadata conformance
2355    
2356     ## TODO: pragma conformance
2357     if (defined $http_equiv_attr) { ## An enumerated attribute
2358     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2359 wakaba 1.33
2360 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2361     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2362     node => $http_equiv_attr,
2363 wakaba 1.104 level => $self->{level}->{must});
2364 wakaba 1.85 } else {
2365     $self->{has_http_equiv}->{$keyword} = 1;
2366     }
2367    
2368     if ($keyword eq 'content-type') {
2369 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2370 wakaba 1.33
2371 wakaba 1.32 $check_charset_decl->();
2372 wakaba 1.182
2373     unless ($item->{node}->owner_document->manakai_is_html) {
2374     $self->{onerror}->(node => $item->{node},
2375     type => 'in XML:charset',
2376     level => $self->{level}->{must});
2377     }
2378    
2379 wakaba 1.32 if ($content_attr) {
2380     my $content = $content_attr->value;
2381 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2382 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2383 wakaba 1.58 =(.+)\z!sx) {
2384 wakaba 1.32 $check_charset->($content_attr, $1);
2385     } else {
2386     $self->{onerror}->(node => $content_attr,
2387     type => 'meta content-type syntax error',
2388 wakaba 1.104 level => $self->{level}->{must});
2389 wakaba 1.85 }
2390     }
2391     } elsif ($keyword eq 'default-style') {
2392     ## ISSUE: Not defined yet in the spec.
2393     } elsif ($keyword eq 'refresh') {
2394     if ($content_attr) {
2395     my $content = $content_attr->value;
2396     if ($content =~ /\A[0-9]+\z/) {
2397     ## NOTE: Valid non-negative integer.
2398     #
2399 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2400 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2401     Whatpm::URIChecker->check_iri_reference ($content, sub {
2402 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2403 wakaba 1.106 }, $self->{level});
2404 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2405    
2406     $element_state->{uri_info}->{content}->{node} = $content_attr;
2407     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2408     ## TODO: absolute
2409     push @{$self->{return}->{uri}->{$content} ||= []},
2410     $element_state->{uri_info}->{content};
2411     } else {
2412     $self->{onerror}->(node => $content_attr,
2413     type => 'refresh:syntax error',
2414 wakaba 1.104 level => $self->{level}->{must});
2415 wakaba 1.32 }
2416     }
2417     } else {
2418     $self->{onerror}->(node => $http_equiv_attr,
2419 wakaba 1.104 type => 'enumerated:invalid',
2420     level => $self->{level}->{must});
2421 wakaba 1.32 }
2422     }
2423    
2424     if (defined $charset_attr) {
2425 wakaba 1.182 my $value = $charset_attr->value;
2426    
2427 wakaba 1.32 $check_charset_decl->();
2428 wakaba 1.182 $check_charset->($charset_attr, $value);
2429    
2430     if (not $item->{node}->owner_document->manakai_is_html and
2431     not $value =~ /\A[Uu][Tt][Ff]-8\z/) {
2432     $self->{onerror}->(node => $item->{node},
2433     type => 'in XML:charset',
2434     level => $self->{level}->{must});
2435     }
2436 wakaba 1.1 }
2437     },
2438     };
2439    
2440     $Element->{$HTML_NS}->{style} = {
2441 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2442 wakaba 1.40 %HTMLChecker,
2443     check_attrs => $GetHTMLAttrsChecker->({
2444 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2445     media => $HTMLMQAttrChecker,
2446     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2447     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2448     ## not different
2449 wakaba 1.49 }, {
2450     %HTMLAttrStatus,
2451 wakaba 1.82 %XHTML2CommonAttrStatus,
2452 wakaba 1.187 dir => FEATURE_HTML5_REC,
2453 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2454 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2455 wakaba 1.187 id => FEATURE_HTML5_REC,
2456     lang => FEATURE_HTML5_REC,
2457 wakaba 1.153 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2458     scoped => FEATURE_HTML5_FD,
2459 wakaba 1.187 title => FEATURE_HTML5_REC,
2460 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2461 wakaba 1.1 }),
2462 wakaba 1.40 check_start => sub {
2463     my ($self, $item, $element_state) = @_;
2464    
2465 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2466 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2467 wakaba 1.93 $type = 'text/css' unless defined $type;
2468     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2469     $type = "$1/$2";
2470     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2471     } else {
2472     ## NOTE: We don't know how parameters are handled by UAs. According to
2473     ## HTML5 specification, <style> with unknown parameters in |type=""|
2474     ## must be ignored.
2475     undef $type;
2476     }
2477     if (not defined $type) {
2478     $element_state->{allow_element} = 1; # invalid type=""
2479     } elsif ($type eq 'text/css') {
2480 wakaba 1.40 $element_state->{allow_element} = 0;
2481 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2482     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2483     # $element_state->{allow_element} = 1;
2484 wakaba 1.40 } else {
2485     $element_state->{allow_element} = 1; # unknown
2486     }
2487 wakaba 1.93 $element_state->{style_type} = $type;
2488 wakaba 1.79
2489     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2490     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2491 wakaba 1.107
2492     $element_state->{text} = '';
2493 wakaba 1.40 },
2494     check_child_element => sub {
2495     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2496     $child_is_transparent, $element_state) = @_;
2497 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2498     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2499 wakaba 1.40 $self->{onerror}->(node => $child_el,
2500     type => 'element not allowed:minus',
2501 wakaba 1.104 level => $self->{level}->{must});
2502 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2503     #
2504     } elsif ($element_state->{allow_element}) {
2505     #
2506     } else {
2507 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2508     level => $self->{level}->{must});
2509 wakaba 1.40 }
2510     },
2511     check_child_text => sub {
2512     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2513 wakaba 1.115 $element_state->{text} .= $child_node->data;
2514 wakaba 1.40 },
2515     check_end => sub {
2516     my ($self, $item, $element_state) = @_;
2517 wakaba 1.93 if (not defined $element_state->{style_type}) {
2518     ## NOTE: Invalid type=""
2519     #
2520     } elsif ($element_state->{style_type} eq 'text/css') {
2521 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2522     container_node => $item->{node},
2523 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2524 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2525     ## NOTE: XML content should be checked by THIS instance of checker
2526     ## as part of normal tree validation. However, we don't know of any
2527     ## XML-based styling language that can be used in HTML <style> element,
2528     ## such that we throw a "style language not supported" error.
2529 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2530     type => 'XML style lang',
2531     text => $element_state->{style_type},
2532     level => $self->{level}->{uncertain});
2533 wakaba 1.93 } else {
2534     ## NOTE: Should we raise some kind of error for,
2535     ## say, <style type="text/plaion">?
2536     $self->{onsubdoc}->({s => $element_state->{text},
2537     container_node => $item->{node},
2538     media_type => $element_state->{style_type},
2539     is_char_string => 1});
2540 wakaba 1.27 }
2541 wakaba 1.40
2542     $HTMLChecker{check_end}->(@_);
2543 wakaba 1.1 },
2544     };
2545 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2546 wakaba 1.1
2547     $Element->{$HTML_NS}->{body} = {
2548 wakaba 1.72 %HTMLFlowContentChecker,
2549 wakaba 1.187 status => FEATURE_HTML5_REC,
2550 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2551     alink => $HTMLColorAttrChecker,
2552     background => $HTMLURIAttrChecker,
2553     bgcolor => $HTMLColorAttrChecker,
2554     link => $HTMLColorAttrChecker,
2555 wakaba 1.188 onafterprint => $HTMLEventHandlerAttrChecker,
2556     onbeforeprint => $HTMLEventHandlerAttrChecker,
2557     onbeforeunload => $HTMLEventHandlerAttrChecker,
2558     onblur => $HTMLEventHandlerAttrChecker,
2559     onerror => $HTMLEventHandlerAttrChecker,
2560     onfocus => $HTMLEventHandlerAttrChecker,
2561     onhashchange => $HTMLEventHandlerAttrChecker,
2562     onload => $HTMLEventHandlerAttrChecker,
2563     onmessage => $HTMLEventHandlerAttrChecker,
2564     onoffline => $HTMLEventHandlerAttrChecker,
2565     ononline => $HTMLEventHandlerAttrChecker,
2566 wakaba 1.186 onpopstate => $HTMLEventHandlerAttrChecker,
2567 wakaba 1.188 onredo => $HTMLEventHandlerAttrChecker,
2568     onresize => $HTMLEventHandlerAttrChecker,
2569     onstorage => $HTMLEventHandlerAttrChecker,
2570     onundo => $HTMLEventHandlerAttrChecker,
2571     onunload => $HTMLEventHandlerAttrChecker,
2572 wakaba 1.68 text => $HTMLColorAttrChecker,
2573     vlink => $HTMLColorAttrChecker,
2574     }, {
2575 wakaba 1.49 %HTMLAttrStatus,
2576 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2577 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2578     background => FEATURE_M12N10_REC_DEPRECATED,
2579     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2580 wakaba 1.187 lang => FEATURE_HTML5_REC,
2581 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2582 wakaba 1.188 onafterprint => FEATURE_HTML5_LC,
2583     onbeforeprint => FEATURE_HTML5_LC,
2584     onbeforeunload => FEATURE_HTML5_LC,
2585     onblur => FEATURE_HTML5_LC,
2586     onerror => FEATURE_HTML5_LC,
2587     onfocus => FEATURE_HTML5_LC,
2588     onhashchange => FEATURE_HTML5_LC,
2589     onload => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2590     onmessage => FEATURE_HTML5_LC,
2591     onoffline => FEATURE_HTML5_LC,
2592     ononline => FEATURE_HTML5_LC,
2593 wakaba 1.186 onpopstate => FEATURE_HTML5_LC,
2594 wakaba 1.188 onredo => FEATURE_HTML5_LC,
2595     onresize => FEATURE_HTML5_LC,
2596     onstorage => FEATURE_HTML5_LC,
2597     onundo => FEATURE_HTML5_LC,
2598     onunload => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2599 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2600     vlink => FEATURE_M12N10_REC_DEPRECATED,
2601     }),
2602 wakaba 1.68 check_start => sub {
2603     my ($self, $item, $element_state) = @_;
2604    
2605     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2606 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2607     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2608 wakaba 1.68 },
2609 wakaba 1.1 };
2610    
2611     $Element->{$HTML_NS}->{section} = {
2612 wakaba 1.72 %HTMLFlowContentChecker,
2613 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2614 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2615 wakaba 1.189 cite => $HTMLURIAttrChecker,
2616 wakaba 1.82 }, {
2617     %HTMLAttrStatus,
2618     %XHTML2CommonAttrStatus,
2619 wakaba 1.189 cite => FEATURE_HTML5_DROPPED | FEATURE_XHTML2_ED,
2620 wakaba 1.82 }),
2621 wakaba 1.1 };
2622    
2623     $Element->{$HTML_NS}->{nav} = {
2624 wakaba 1.153 status => FEATURE_HTML5_LC,
2625 wakaba 1.72 %HTMLFlowContentChecker,
2626 wakaba 1.1 };
2627    
2628     $Element->{$HTML_NS}->{article} = {
2629 wakaba 1.174 %HTMLFlowContentChecker,
2630 wakaba 1.153 status => FEATURE_HTML5_LC,
2631 wakaba 1.174 check_attrs => $GetHTMLAttrsChecker->({
2632 wakaba 1.189 cite => $HTMLURIAttrChecker,
2633 wakaba 1.174 pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2634     }, {
2635     %HTMLAttrStatus,
2636 wakaba 1.189 cite => FEATURE_HTML5_DROPPED,
2637 wakaba 1.174 pubdate => FEATURE_HTML5_LC,
2638     }),
2639     }; # article
2640 wakaba 1.1
2641     $Element->{$HTML_NS}->{blockquote} = {
2642 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2643 wakaba 1.72 %HTMLFlowContentChecker,
2644 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2645 wakaba 1.1 cite => $HTMLURIAttrChecker,
2646 wakaba 1.49 }, {
2647     %HTMLAttrStatus,
2648 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2649 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2650 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2651 wakaba 1.187 lang => FEATURE_HTML5_REC,
2652 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2653 wakaba 1.1 }),
2654 wakaba 1.66 check_start => sub {
2655     my ($self, $item, $element_state) = @_;
2656    
2657     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2658 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2659     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2660 wakaba 1.66 },
2661 wakaba 1.1 };
2662    
2663     $Element->{$HTML_NS}->{aside} = {
2664 wakaba 1.153 status => FEATURE_HTML5_LC,
2665 wakaba 1.72 %HTMLFlowContentChecker,
2666 wakaba 1.1 };
2667    
2668     $Element->{$HTML_NS}->{h1} = {
2669 wakaba 1.40 %HTMLPhrasingContentChecker,
2670 wakaba 1.187 status => FEATURE_HTML5_REC,
2671 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2672     align => $GetHTMLEnumeratedAttrChecker->({
2673     left => 1, center => 1, right => 1, justify => 1,
2674     }),
2675     }, {
2676 wakaba 1.49 %HTMLAttrStatus,
2677 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2678 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2679 wakaba 1.187 lang => FEATURE_HTML5_REC,
2680 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2681 wakaba 1.49 }),
2682 wakaba 1.40 check_start => sub {
2683     my ($self, $item, $element_state) = @_;
2684     $self->{flag}->{has_hn} = 1;
2685 wakaba 1.79
2686     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2687     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2688 wakaba 1.1 },
2689     };
2690    
2691 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2692 wakaba 1.1
2693 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2694 wakaba 1.1
2695 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2696 wakaba 1.1
2697 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2698 wakaba 1.1
2699 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2700 wakaba 1.1
2701 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2702 wakaba 1.174
2703 wakaba 1.195 $Element->{$HTML_NS}->{hgroup} = {
2704     %HTMLChecker,
2705     status => FEATURE_HTML5_LC,
2706     check_child_element => sub {
2707     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2708     $child_is_transparent, $element_state, $element_state2) = @_;
2709     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2710     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2711     $self->{onerror}->(node => $child_el,
2712     type => 'element not allowed:minus',
2713     level => $self->{level}->{must});
2714     if ($child_nsuri eq $HTML_NS and $child_ln =~ /\Ah[1-6]\z/) {
2715     $element_state2->{has_hn} = 1;
2716     }
2717     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2718     #
2719     } elsif ($child_nsuri eq $HTML_NS and $child_ln =~ /\Ah[1-6]\z/) {
2720     ## NOTE: Use $element_state2 instead of $element_state here so
2721     ## that the |h2| element in |<hgroup><ins><h2>| is not counted
2722     ## as an |h2| of the |hgroup| element.
2723     $element_state2->{has_hn} = 1;
2724     } else {
2725     $self->{onerror}->(node => $child_el, type => 'element not allowed',
2726     level => $self->{level}->{must});
2727     }
2728     }, # check_child_element
2729     check_child_text => sub {
2730     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2731     if ($has_significant) {
2732     $self->{onerror}->(node => $child_node, type => 'character not allowed',
2733     level => $self->{level}->{must});
2734     }
2735     }, # check_child_text
2736     check_end => sub {
2737     my ($self, $item, $element_state) = @_;
2738     unless ($element_state->{has_hn}) {
2739     $self->{onerror}->(node => $item->{node},
2740     type => 'element missing:hn',
2741     level => $self->{level}->{must});
2742     }
2743    
2744     $HTMLChecker{check_end}->(@_);
2745     }, # check_end
2746     }; # hgroup
2747 wakaba 1.29
2748 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2749 wakaba 1.195 %HTMLFlowContentChecker,
2750 wakaba 1.153 status => FEATURE_HTML5_LC,
2751 wakaba 1.40 check_start => sub {
2752     my ($self, $item, $element_state) = @_;
2753     $self->_add_minus_elements ($element_state,
2754 wakaba 1.195 {$HTML_NS => {qw/header 1 footer 1/}});
2755 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2756     $self->{flag}->{has_hn} = 0;
2757 wakaba 1.79
2758     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2759     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2760 wakaba 1.195 }, # check_start
2761 wakaba 1.40 check_end => sub {
2762     my ($self, $item, $element_state) = @_;
2763     $self->_remove_minus_elements ($element_state);
2764     unless ($self->{flag}->{has_hn}) {
2765     $self->{onerror}->(node => $item->{node},
2766 wakaba 1.104 type => 'element missing:hn',
2767 wakaba 1.195 level => $self->{level}->{warn});
2768 wakaba 1.40 }
2769     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2770 wakaba 1.1
2771 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2772 wakaba 1.195 }, # check_end
2773     }; # header
2774 wakaba 1.1
2775     $Element->{$HTML_NS}->{footer} = {
2776 wakaba 1.153 status => FEATURE_HTML5_LC,
2777 wakaba 1.72 %HTMLFlowContentChecker,
2778 wakaba 1.40 check_start => sub {
2779     my ($self, $item, $element_state) = @_;
2780     $self->_add_minus_elements ($element_state,
2781 wakaba 1.177 {$HTML_NS => {header => 1, footer => 1}},
2782 wakaba 1.58 $HTMLSectioningContent,
2783 wakaba 1.57 $HTMLHeadingContent);
2784 wakaba 1.79
2785     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2786     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2787 wakaba 1.40 },
2788     check_end => sub {
2789     my ($self, $item, $element_state) = @_;
2790     $self->_remove_minus_elements ($element_state);
2791 wakaba 1.1
2792 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2793 wakaba 1.1 },
2794     };
2795    
2796     $Element->{$HTML_NS}->{address} = {
2797 wakaba 1.72 %HTMLFlowContentChecker,
2798 wakaba 1.187 status => FEATURE_HTML5_REC,
2799 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2800     ## TODO: add test
2801     #align => $GetHTMLEnumeratedAttrChecker->({
2802     # left => 1, center => 1, right => 1, justify => 1,
2803     #}),
2804     }, {
2805 wakaba 1.49 %HTMLAttrStatus,
2806 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2807 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2808 wakaba 1.187 lang => FEATURE_HTML5_REC,
2809 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2810     sdapref => FEATURE_HTML20_RFC,
2811 wakaba 1.49 }),
2812 wakaba 1.40 check_start => sub {
2813     my ($self, $item, $element_state) = @_;
2814 wakaba 1.177 $self->_add_minus_elements
2815     ($element_state,
2816     {$HTML_NS => {header => 1, footer => 1, address => 1}},
2817     $HTMLSectioningContent, $HTMLHeadingContent);
2818 wakaba 1.79
2819     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2820     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2821 wakaba 1.40 },
2822     check_end => sub {
2823     my ($self, $item, $element_state) = @_;
2824     $self->_remove_minus_elements ($element_state);
2825 wakaba 1.29
2826 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2827 wakaba 1.29 },
2828 wakaba 1.1 };
2829    
2830     $Element->{$HTML_NS}->{p} = {
2831 wakaba 1.40 %HTMLPhrasingContentChecker,
2832 wakaba 1.187 status => FEATURE_HTML5_REC,
2833 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2834     align => $GetHTMLEnumeratedAttrChecker->({
2835     left => 1, center => 1, right => 1, justify => 1,
2836     }),
2837     }, {
2838 wakaba 1.49 %HTMLAttrStatus,
2839 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2840 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2841 wakaba 1.187 lang => FEATURE_HTML5_REC,
2842 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2843 wakaba 1.49 }),
2844 wakaba 1.1 };
2845    
2846     $Element->{$HTML_NS}->{hr} = {
2847 wakaba 1.40 %HTMLEmptyChecker,
2848 wakaba 1.187 status => FEATURE_HTML5_REC,
2849 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2850     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2851     }, {
2852 wakaba 1.49 %HTMLAttrStatus,
2853     %HTMLM12NCommonAttrStatus,
2854     align => FEATURE_M12N10_REC_DEPRECATED,
2855 wakaba 1.187 lang => FEATURE_HTML5_REC,
2856 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2857 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2858 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2859     width => FEATURE_M12N10_REC_DEPRECATED,
2860     }),
2861 wakaba 1.1 };
2862    
2863     $Element->{$HTML_NS}->{br} = {
2864 wakaba 1.40 %HTMLEmptyChecker,
2865 wakaba 1.187 status => FEATURE_HTML5_REC,
2866 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2867     clear => $GetHTMLEnumeratedAttrChecker->({
2868     left => 1, all => 1, right => 1, none => 1,
2869     }),
2870     }, {
2871 wakaba 1.49 %HTMLAttrStatus,
2872 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2873 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2874 wakaba 1.187 id => FEATURE_HTML5_REC,
2875 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2876 wakaba 1.187 style => FEATURE_HTML5_REC,
2877     title => FEATURE_HTML5_REC,
2878 wakaba 1.49 }),
2879 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2880     ## (This requirement is semantic so that we cannot check.)
2881 wakaba 1.1 };
2882    
2883     $Element->{$HTML_NS}->{dialog} = {
2884 wakaba 1.153 status => FEATURE_HTML5_WD,
2885 wakaba 1.40 %HTMLChecker,
2886     check_start => sub {
2887     my ($self, $item, $element_state) = @_;
2888     $element_state->{phase} = 'before dt';
2889 wakaba 1.79
2890     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2891     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2892 wakaba 1.40 },
2893     check_child_element => sub {
2894     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2895     $child_is_transparent, $element_state) = @_;
2896 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2897     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2898 wakaba 1.40 $self->{onerror}->(node => $child_el,
2899     type => 'element not allowed:minus',
2900 wakaba 1.104 level => $self->{level}->{must});
2901 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2902     #
2903     } elsif ($element_state->{phase} eq 'before dt') {
2904     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2905     $element_state->{phase} = 'before dd';
2906     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2907     $self->{onerror}
2908 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2909     text => 'dt',
2910     level => $self->{level}->{must});
2911 wakaba 1.40 $element_state->{phase} = 'before dt';
2912     } else {
2913 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2914     level => $self->{level}->{must});
2915 wakaba 1.40 }
2916     } elsif ($element_state->{phase} eq 'before dd') {
2917     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2918     $element_state->{phase} = 'before dt';
2919     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2920     $self->{onerror}
2921 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2922     text => 'dd',
2923     level => $self->{level}->{must});
2924 wakaba 1.40 $element_state->{phase} = 'before dd';
2925     } else {
2926 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2927     level => $self->{level}->{must});
2928 wakaba 1.1 }
2929 wakaba 1.40 } else {
2930     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2931     }
2932     },
2933     check_child_text => sub {
2934     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2935     if ($has_significant) {
2936 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2937     level => $self->{level}->{must});
2938 wakaba 1.1 }
2939 wakaba 1.40 },
2940     check_end => sub {
2941     my ($self, $item, $element_state) = @_;
2942     if ($element_state->{phase} eq 'before dd') {
2943     $self->{onerror}->(node => $item->{node},
2944 wakaba 1.104 type => 'child element missing',
2945     text => 'dd',
2946     level => $self->{level}->{must});
2947 wakaba 1.1 }
2948 wakaba 1.40
2949     $HTMLChecker{check_end}->(@_);
2950 wakaba 1.1 },
2951     };
2952    
2953     $Element->{$HTML_NS}->{pre} = {
2954 wakaba 1.40 %HTMLPhrasingContentChecker,
2955 wakaba 1.187 status => FEATURE_HTML5_REC,
2956 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2957     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2958     }, {
2959 wakaba 1.49 %HTMLAttrStatus,
2960 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2961 wakaba 1.187 lang => FEATURE_HTML5_REC,
2962 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2963 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2964     }),
2965 wakaba 1.101 check_end => sub {
2966     my ($self, $item, $element_state) = @_;
2967    
2968     ## TODO: Flag to enable/disable IDL checking?
2969 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2970 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2971     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2972     ## NOTE: pre.code > code.idl-code: WebIDL spec
2973     ## NOTE: pre.idl-code: DOM1 spec
2974     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2975     ## NOTE: pre.schema: ReSpec-generated specs
2976 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2977     container_node => $item->{node},
2978     media_type => 'text/x-webidl',
2979     is_char_string => 1});
2980     }
2981    
2982 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2983 wakaba 1.101 },
2984 wakaba 1.1 };
2985    
2986     $Element->{$HTML_NS}->{ol} = {
2987 wakaba 1.40 %HTMLChecker,
2988 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2989 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2990 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2991 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2992 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2993 wakaba 1.69 ## TODO: HTML4 |type|
2994 wakaba 1.49 }, {
2995     %HTMLAttrStatus,
2996 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2997 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2998 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2999 wakaba 1.187 lang => FEATURE_HTML5_REC,
3000 wakaba 1.153 reversed => FEATURE_HTML5_WD,
3001 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3002 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
3003     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3004 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3005 wakaba 1.1 }),
3006 wakaba 1.40 check_child_element => sub {
3007     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3008     $child_is_transparent, $element_state) = @_;
3009 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3010     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3011 wakaba 1.40 $self->{onerror}->(node => $child_el,
3012     type => 'element not allowed:minus',
3013 wakaba 1.104 level => $self->{level}->{must});
3014 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3015     #
3016     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3017     #
3018     } else {
3019 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3020     level => $self->{level}->{must});
3021 wakaba 1.1 }
3022 wakaba 1.40 },
3023     check_child_text => sub {
3024     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3025     if ($has_significant) {
3026 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3027     level => $self->{level}->{must});
3028 wakaba 1.1 }
3029     },
3030     };
3031    
3032     $Element->{$HTML_NS}->{ul} = {
3033 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
3034 wakaba 1.187 status => FEATURE_HTML5_REC,
3035 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
3036     compact => $GetHTMLBooleanAttrChecker->('compact'),
3037 wakaba 1.69 ## TODO: HTML4 |type|
3038     ## TODO: sdaform, align
3039 wakaba 1.68 }, {
3040 wakaba 1.49 %HTMLAttrStatus,
3041 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3042 wakaba 1.61 align => FEATURE_HTML2X_RFC,
3043 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
3044 wakaba 1.187 lang => FEATURE_HTML5_REC,
3045 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3046 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3047     }),
3048 wakaba 1.1 };
3049    
3050 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
3051     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
3052     %{$Element->{$HTML_NS}->{ul}},
3053     status => FEATURE_M12N10_REC_DEPRECATED,
3054 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
3055     compact => $GetHTMLBooleanAttrChecker->('compact'),
3056     }, {
3057 wakaba 1.64 %HTMLAttrStatus,
3058     %HTMLM12NCommonAttrStatus,
3059     align => FEATURE_HTML2X_RFC,
3060     compact => FEATURE_M12N10_REC_DEPRECATED,
3061 wakaba 1.187 lang => FEATURE_HTML5_REC,
3062 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
3063     sdapref => FEATURE_HTML20_RFC,
3064     }),
3065     };
3066    
3067 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
3068 wakaba 1.72 %HTMLFlowContentChecker,
3069 wakaba 1.187 status => FEATURE_HTML5_REC,
3070 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3071 wakaba 1.69 ## TODO: HTML4 |type|
3072 wakaba 1.49 value => sub {
3073 wakaba 1.1 my ($self, $attr) = @_;
3074 wakaba 1.152
3075     my $parent_is_ol;
3076 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
3077     if (defined $parent) {
3078     my $parent_ns = $parent->namespace_uri;
3079     $parent_ns = '' unless defined $parent_ns;
3080     my $parent_ln = $parent->manakai_local_name;
3081 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
3082     }
3083    
3084     unless ($parent_is_ol) {
3085     ## ISSUE: No "MUST" in the spec.
3086     $self->{onerror}->(node => $attr,
3087     type => 'non-ol li value',
3088     level => $self->{level}->{html5_fact});
3089 wakaba 1.1 }
3090 wakaba 1.152
3091 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
3092 wakaba 1.131 },
3093 wakaba 1.49 }, {
3094     %HTMLAttrStatus,
3095 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3096 wakaba 1.61 align => FEATURE_HTML2X_RFC,
3097 wakaba 1.187 lang => FEATURE_HTML5_REC,
3098 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3099 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3100 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
3101 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
3102 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
3103 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
3104 wakaba 1.1 }),
3105 wakaba 1.40 check_child_element => sub {
3106     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3107     $child_is_transparent, $element_state) = @_;
3108     if ($self->{flag}->{in_menu}) {
3109 wakaba 1.152 ## TODO: In <dir> element, then ...
3110 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
3111     } else {
3112 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
3113 wakaba 1.40 }
3114     },
3115     check_child_text => sub {
3116     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3117     if ($self->{flag}->{in_menu}) {
3118 wakaba 1.152 ## TODO: In <dir> element, then ...
3119 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
3120 wakaba 1.1 } else {
3121 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
3122 wakaba 1.1 }
3123     },
3124     };
3125    
3126     $Element->{$HTML_NS}->{dl} = {
3127 wakaba 1.40 %HTMLChecker,
3128 wakaba 1.187 status => FEATURE_HTML5_REC,
3129 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
3130     compact => $GetHTMLBooleanAttrChecker->('compact'),
3131     }, {
3132 wakaba 1.49 %HTMLAttrStatus,
3133 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3134 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
3135 wakaba 1.187 lang => FEATURE_HTML5_REC,
3136 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3137     sdapref => FEATURE_HTML20_RFC,
3138 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
3139     }),
3140 wakaba 1.40 check_start => sub {
3141     my ($self, $item, $element_state) = @_;
3142     $element_state->{phase} = 'before dt';
3143 wakaba 1.79
3144     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3145     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3146 wakaba 1.40 },
3147     check_child_element => sub {
3148     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3149     $child_is_transparent, $element_state) = @_;
3150 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3151     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3152 wakaba 1.40 $self->{onerror}->(node => $child_el,
3153     type => 'element not allowed:minus',
3154 wakaba 1.104 level => $self->{level}->{must});
3155 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3156     #
3157     } elsif ($element_state->{phase} eq 'in dds') {
3158     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3159     #$element_state->{phase} = 'in dds';
3160     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3161     $element_state->{phase} = 'in dts';
3162     } else {
3163 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3164     level => $self->{level}->{must});
3165 wakaba 1.40 }
3166     } elsif ($element_state->{phase} eq 'in dts') {
3167     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3168     #$element_state->{phase} = 'in dts';
3169     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3170     $element_state->{phase} = 'in dds';
3171     } else {
3172 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3173     level => $self->{level}->{must});
3174 wakaba 1.40 }
3175     } elsif ($element_state->{phase} eq 'before dt') {
3176     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3177     $element_state->{phase} = 'in dts';
3178     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3179     $self->{onerror}
3180 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
3181     text => 'dt',
3182     level => $self->{level}->{must});
3183 wakaba 1.40 $element_state->{phase} = 'in dds';
3184     } else {
3185 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3186     level => $self->{level}->{must});
3187 wakaba 1.1 }
3188 wakaba 1.40 } else {
3189     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3190 wakaba 1.1 }
3191 wakaba 1.40 },
3192     check_child_text => sub {
3193     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3194     if ($has_significant) {
3195 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3196     level => $self->{level}->{must});
3197 wakaba 1.40 }
3198     },
3199     check_end => sub {
3200     my ($self, $item, $element_state) = @_;
3201     if ($element_state->{phase} eq 'in dts') {
3202     $self->{onerror}->(node => $item->{node},
3203 wakaba 1.104 type => 'child element missing',
3204     text => 'dd',
3205     level => $self->{level}->{must});
3206 wakaba 1.1 }
3207    
3208 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3209 wakaba 1.1 },
3210     };
3211    
3212     $Element->{$HTML_NS}->{dt} = {
3213 wakaba 1.40 %HTMLPhrasingContentChecker,
3214 wakaba 1.187 status => FEATURE_HTML5_REC,
3215 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3216     %HTMLAttrStatus,
3217 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3218 wakaba 1.187 lang => FEATURE_HTML5_REC,
3219 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3220 wakaba 1.49 }),
3221 wakaba 1.1 };
3222    
3223     $Element->{$HTML_NS}->{dd} = {
3224 wakaba 1.72 %HTMLFlowContentChecker,
3225 wakaba 1.187 status => FEATURE_HTML5_REC,
3226 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3227     %HTMLAttrStatus,
3228 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3229 wakaba 1.187 lang => FEATURE_HTML5_REC,
3230 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3231 wakaba 1.49 }),
3232 wakaba 1.1 };
3233    
3234     $Element->{$HTML_NS}->{a} = {
3235 wakaba 1.123 %HTMLTransparentChecker,
3236 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3237 wakaba 1.40 check_attrs => sub {
3238     my ($self, $item, $element_state) = @_;
3239 wakaba 1.1 my %attr;
3240 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3241 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3242     $attr_ns = '' unless defined $attr_ns;
3243     my $attr_ln = $attr->manakai_local_name;
3244     my $checker;
3245 wakaba 1.73 my $status;
3246 wakaba 1.1 if ($attr_ns eq '') {
3247 wakaba 1.73 $status = {
3248     %HTMLAttrStatus,
3249 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3250 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
3251 wakaba 1.73 charset => FEATURE_M12N10_REC,
3252 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3253 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3254     dn => FEATURE_RFC2659,
3255 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3256 wakaba 1.153 FEATURE_M12N10_REC,
3257     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3258     FEATURE_M12N10_REC,
3259 wakaba 1.187 lang => FEATURE_HTML5_REC,
3260 wakaba 1.153 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3261 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3262     name => FEATURE_M12N10_REC_DEPRECATED,
3263     nonce => FEATURE_RFC2659,
3264     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3265     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3266 wakaba 1.153 ping => FEATURE_HTML5_WD,
3267 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3268     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3269 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3270 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3271 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3272 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3273     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3274 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3275     }->{$attr_ln};
3276    
3277 wakaba 1.1 $checker = {
3278 wakaba 1.91 charset => sub {
3279     my ($self, $attr) = @_;
3280     $HTMLCharsetChecker->($attr->value, @_);
3281     },
3282 wakaba 1.70 ## TODO: HTML4 |coords|
3283 wakaba 1.1 target => $HTMLTargetAttrChecker,
3284     href => $HTMLURIAttrChecker,
3285     ping => $HTMLSpaceURIsAttrChecker,
3286 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3287 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3288 wakaba 1.70 ## TODO: HTML4 |shape|
3289 wakaba 1.1 media => $HTMLMQAttrChecker,
3290 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3291 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3292     type => $HTMLIMTAttrChecker,
3293     }->{$attr_ln};
3294     if ($checker) {
3295     $attr{$attr_ln} = $attr;
3296 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3297     $attr_ln !~ /[A-Z]/) {
3298 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3299     $status = $HTMLDatasetAttrStatus;
3300 wakaba 1.1 } else {
3301     $checker = $HTMLAttrChecker->{$attr_ln};
3302     }
3303     }
3304     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3305     || $AttrChecker->{$attr_ns}->{''};
3306 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3307     || $AttrStatus->{$attr_ns}->{''};
3308     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3309 wakaba 1.62
3310 wakaba 1.1 if ($checker) {
3311 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3312 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3313 wakaba 1.54 #
3314 wakaba 1.1 } else {
3315 wakaba 1.104 $self->{onerror}->(node => $attr,
3316     type => 'unknown attribute',
3317     level => $self->{level}->{uncertain});
3318 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3319 wakaba 1.1 }
3320 wakaba 1.49
3321 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3322 wakaba 1.1 }
3323    
3324 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3325 wakaba 1.4 if (defined $attr{href}) {
3326     $self->{has_hyperlink_element} = 1;
3327 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3328 wakaba 1.4 } else {
3329 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3330     if (defined $attr{$_}) {
3331     $self->{onerror}->(node => $attr{$_},
3332 wakaba 1.104 type => 'attribute not allowed',
3333     level => $self->{level}->{must});
3334 wakaba 1.1 }
3335     }
3336     }
3337 wakaba 1.66
3338     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3339 wakaba 1.1 },
3340 wakaba 1.40 check_start => sub {
3341     my ($self, $item, $element_state) = @_;
3342     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3343 wakaba 1.79
3344     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3345     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3346 wakaba 1.40 },
3347     check_end => sub {
3348     my ($self, $item, $element_state) = @_;
3349     $self->_remove_minus_elements ($element_state);
3350 wakaba 1.59 delete $self->{flag}->{in_a_href}
3351     unless $element_state->{in_a_href_original};
3352 wakaba 1.1
3353 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3354 wakaba 1.1 },
3355     };
3356    
3357     $Element->{$HTML_NS}->{q} = {
3358 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3359 wakaba 1.40 %HTMLPhrasingContentChecker,
3360     check_attrs => $GetHTMLAttrsChecker->({
3361 wakaba 1.50 cite => $HTMLURIAttrChecker,
3362     }, {
3363 wakaba 1.49 %HTMLAttrStatus,
3364 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3365 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3366 wakaba 1.187 lang => FEATURE_HTML5_REC,
3367 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3368     sdasuff => FEATURE_HTML2X_RFC,
3369 wakaba 1.1 }),
3370 wakaba 1.66 check_start => sub {
3371     my ($self, $item, $element_state) = @_;
3372    
3373     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3374 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3375     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3376 wakaba 1.66 },
3377 wakaba 1.1 };
3378 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3379     ## placed inside the <code>q</code> element." Though we cannot test the
3380     ## element against this requirement since it incluides a semantic bit,
3381     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3382     ## the |q| element.
3383 wakaba 1.1
3384     $Element->{$HTML_NS}->{cite} = {
3385 wakaba 1.40 %HTMLPhrasingContentChecker,
3386 wakaba 1.187 status => FEATURE_HTML5_REC,
3387 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3388     %HTMLAttrStatus,
3389 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3390 wakaba 1.187 lang => FEATURE_HTML5_REC,
3391 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3392 wakaba 1.49 }),
3393 wakaba 1.1 };
3394    
3395     $Element->{$HTML_NS}->{em} = {
3396 wakaba 1.40 %HTMLPhrasingContentChecker,
3397 wakaba 1.187 status => FEATURE_HTML5_REC,
3398 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3399     %HTMLAttrStatus,
3400 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3401 wakaba 1.187 lang => FEATURE_HTML5_REC,
3402 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3403 wakaba 1.49 }),
3404 wakaba 1.1 };
3405    
3406     $Element->{$HTML_NS}->{strong} = {
3407 wakaba 1.40 %HTMLPhrasingContentChecker,
3408 wakaba 1.187 status => FEATURE_HTML5_REC,
3409 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3410     %HTMLAttrStatus,
3411 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3412 wakaba 1.187 lang => FEATURE_HTML5_REC,
3413 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3414 wakaba 1.49 }),
3415 wakaba 1.1 };
3416    
3417     $Element->{$HTML_NS}->{small} = {
3418 wakaba 1.40 %HTMLPhrasingContentChecker,
3419 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3420 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3421     %HTMLAttrStatus,
3422     %HTMLM12NCommonAttrStatus,
3423 wakaba 1.187 lang => FEATURE_HTML5_REC,
3424 wakaba 1.49 }),
3425 wakaba 1.1 };
3426    
3427 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3428     %HTMLPhrasingContentChecker,
3429     status => FEATURE_M12N10_REC,
3430     check_attrs => $GetHTMLAttrsChecker->({}, {
3431     %HTMLAttrStatus,
3432     %HTMLM12NCommonAttrStatus,
3433 wakaba 1.187 lang => FEATURE_HTML5_REC,
3434 wakaba 1.51 }),
3435     };
3436    
3437 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3438 wakaba 1.187 status => FEATURE_HTML5_WD,
3439 wakaba 1.40 %HTMLPhrasingContentChecker,
3440 wakaba 1.1 };
3441    
3442     $Element->{$HTML_NS}->{dfn} = {
3443 wakaba 1.40 %HTMLPhrasingContentChecker,
3444 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3445 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3446     %HTMLAttrStatus,
3447 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3448 wakaba 1.187 lang => FEATURE_HTML5_REC,
3449 wakaba 1.49 }),
3450 wakaba 1.40 check_start => sub {
3451     my ($self, $item, $element_state) = @_;
3452     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3453 wakaba 1.1
3454 wakaba 1.40 my $node = $item->{node};
3455 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3456     unless (defined $term) {
3457     for my $child (@{$node->child_nodes}) {
3458     if ($child->node_type == 1) { # ELEMENT_NODE
3459     if (defined $term) {
3460     undef $term;
3461     last;
3462     } elsif ($child->manakai_local_name eq 'abbr') {
3463     my $nsuri = $child->namespace_uri;
3464     if (defined $nsuri and $nsuri eq $HTML_NS) {
3465     my $attr = $child->get_attribute_node_ns (undef, 'title');
3466     if ($attr) {
3467     $term = $attr->value;
3468     }
3469     }
3470     }
3471     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3472     ## TEXT_NODE or CDATA_SECTION_NODE
3473 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3474 wakaba 1.1 next;
3475     }
3476     undef $term;
3477     last;
3478     }
3479     }
3480     unless (defined $term) {
3481     $term = $node->text_content;
3482     }
3483     }
3484     if ($self->{term}->{$term}) {
3485     push @{$self->{term}->{$term}}, $node;
3486     } else {
3487     $self->{term}->{$term} = [$node];
3488     }
3489 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3490     ## |ruby| unless |dfn| has |title|.
3491 wakaba 1.79
3492     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3493     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3494 wakaba 1.40 },
3495     check_end => sub {
3496     my ($self, $item, $element_state) = @_;
3497     $self->_remove_minus_elements ($element_state);
3498 wakaba 1.1
3499 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3500 wakaba 1.1 },
3501     };
3502    
3503     $Element->{$HTML_NS}->{abbr} = {
3504 wakaba 1.40 %HTMLPhrasingContentChecker,
3505 wakaba 1.187 status => FEATURE_HTML5_REC,
3506 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3507     %HTMLAttrStatus,
3508 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3509     full => FEATURE_XHTML2_ED,
3510 wakaba 1.187 lang => FEATURE_HTML5_REC,
3511 wakaba 1.49 }),
3512 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3513     ## number (plural vs singular) must match the grammatical number of the
3514     ## contents of the element." Though this can be checked by machine,
3515     ## it requires language-specific knowledge and dictionary, such that
3516     ## we don't support the check of the requirement.
3517     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3518 wakaba 1.49 };
3519    
3520     $Element->{$HTML_NS}->{acronym} = {
3521     %HTMLPhrasingContentChecker,
3522     status => FEATURE_M12N10_REC,
3523     check_attrs => $GetHTMLAttrsChecker->({}, {
3524     %HTMLAttrStatus,
3525     %HTMLM12NCommonAttrStatus,
3526 wakaba 1.187 lang => FEATURE_HTML5_REC,
3527 wakaba 1.49 }),
3528 wakaba 1.1 };
3529    
3530     $Element->{$HTML_NS}->{time} = {
3531 wakaba 1.187 status => FEATURE_HTML5_WD,
3532 wakaba 1.40 %HTMLPhrasingContentChecker,
3533     check_attrs => $GetHTMLAttrsChecker->({
3534 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3535 wakaba 1.49 }, {
3536     %HTMLAttrStatus,
3537     %HTMLM12NCommonAttrStatus,
3538 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3539 wakaba 1.1 }),
3540 wakaba 1.168 ## TODO: Update definition
3541 wakaba 1.1 ## TODO: Write tests
3542 wakaba 1.40 check_end => sub {
3543     my ($self, $item, $element_state) = @_;
3544 wakaba 1.1
3545 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3546 wakaba 1.1 my $input;
3547     my $reg_sp;
3548     my $input_node;
3549     if ($attr) {
3550     $input = $attr->value;
3551 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3552 wakaba 1.1 $input_node = $attr;
3553     } else {
3554 wakaba 1.40 $input = $item->{node}->text_content;
3555 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3556 wakaba 1.40 $input_node = $item->{node};
3557 wakaba 1.1
3558     ## ISSUE: What is the definition for "successfully extracts a date
3559     ## or time"? If the algorithm says the string is invalid but
3560     ## return some date or time, is it "successfully"?
3561     }
3562    
3563     my $hour;
3564     my $minute;
3565     my $second;
3566     if ($input =~ /
3567     \A
3568 wakaba 1.112 $reg_sp
3569 wakaba 1.1 ([0-9]+) # 1
3570     (?>
3571     -([0-9]+) # 2
3572 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3573     $reg_sp
3574 wakaba 1.1 (?>
3575     T
3576 wakaba 1.112 $reg_sp
3577 wakaba 1.1 )?
3578     ([0-9]+) # 4
3579     :([0-9]+) # 5
3580     (?>
3581     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3582     )?
3583 wakaba 1.112 $reg_sp
3584 wakaba 1.1 (?>
3585     Z
3586 wakaba 1.112 $reg_sp
3587 wakaba 1.1 |
3588     [+-]([0-9]+):([0-9]+) # 7, 8
3589 wakaba 1.112 $reg_sp
3590 wakaba 1.1 )?
3591     \z
3592     |
3593     :([0-9]+) # 9
3594     (?>
3595     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3596     )?
3597 wakaba 1.112 $reg_sp
3598     \z
3599 wakaba 1.1 )
3600     /x) {
3601     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3602     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3603     length $4 != 2 or length $5 != 2) {
3604     $self->{onerror}->(node => $input_node,
3605 wakaba 1.104 type => 'dateortime:syntax error',
3606     level => $self->{level}->{must});
3607 wakaba 1.1 }
3608    
3609     if (1 <= $2 and $2 <= 12) {
3610 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3611     level => $self->{level}->{must})
3612 wakaba 1.1 if $3 < 1 or
3613     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3614 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3615     level => $self->{level}->{must})
3616 wakaba 1.1 if $2 == 2 and $3 == 29 and
3617     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3618     } else {
3619     $self->{onerror}->(node => $input_node,
3620 wakaba 1.104 type => 'datetime:bad month',
3621     level => $self->{level}->{must});
3622 wakaba 1.1 }
3623    
3624     ($hour, $minute, $second) = ($4, $5, $6);
3625    
3626     if (defined $7) { ## [+-]hh:mm
3627     if (length $7 != 2 or length $8 != 2) {
3628     $self->{onerror}->(node => $input_node,
3629 wakaba 1.104 type => 'dateortime:syntax error',
3630     level => $self->{level}->{must});
3631 wakaba 1.1 }
3632    
3633     $self->{onerror}->(node => $input_node,
3634 wakaba 1.104 type => 'datetime:bad timezone hour',
3635     level => $self->{level}->{must})
3636 wakaba 1.1 if $7 > 23;
3637     $self->{onerror}->(node => $input_node,
3638 wakaba 1.104 type => 'datetime:bad timezone minute',
3639     level => $self->{level}->{must})
3640 wakaba 1.1 if $8 > 59;
3641     }
3642     } else { ## hh:mm
3643     if (length $1 != 2 or length $9 != 2) {
3644     $self->{onerror}->(node => $input_node,
3645 wakaba 1.104 type => qq'dateortime:syntax error',
3646     level => $self->{level}->{must});
3647 wakaba 1.1 }
3648    
3649     ($hour, $minute, $second) = ($1, $9, $10);
3650     }
3651    
3652 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3653     level => $self->{level}->{must}) if $hour > 23;
3654     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3655     level => $self->{level}->{must}) if $minute > 59;
3656 wakaba 1.1
3657     if (defined $second) { ## s
3658     ## NOTE: Integer part of second don't have to have length of two.
3659    
3660     if (substr ($second, 0, 1) eq '.') {
3661     $self->{onerror}->(node => $input_node,
3662 wakaba 1.104 type => 'dateortime:syntax error',
3663     level => $self->{level}->{must});
3664 wakaba 1.1 }
3665    
3666 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3667     level => $self->{level}->{must}) if $second >= 60;
3668 wakaba 1.1 }
3669     } else {
3670     $self->{onerror}->(node => $input_node,
3671 wakaba 1.104 type => 'dateortime:syntax error',
3672     level => $self->{level}->{must});
3673 wakaba 1.1 }
3674    
3675 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3676 wakaba 1.1 },
3677     };
3678    
3679     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3680 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3681 wakaba 1.113 ## TODO: content checking
3682     ## TODO: content or value must contain number (rev 2053)
3683 wakaba 1.40 %HTMLPhrasingContentChecker,
3684 wakaba 1.187 status => FEATURE_HTML5_WD,
3685 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3686 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3687     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3688     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3689     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3690     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3691     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3692 wakaba 1.50 }, {
3693     %HTMLAttrStatus,
3694     high => FEATURE_HTML5_DEFAULT,
3695     low => FEATURE_HTML5_DEFAULT,
3696     max => FEATURE_HTML5_DEFAULT,
3697     min => FEATURE_HTML5_DEFAULT,
3698     optimum => FEATURE_HTML5_DEFAULT,
3699     value => FEATURE_HTML5_DEFAULT,
3700 wakaba 1.1 }),
3701     };
3702    
3703     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3704 wakaba 1.40 %HTMLPhrasingContentChecker,
3705 wakaba 1.187 status => FEATURE_HTML5_WD,
3706 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3707 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3708     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3709 wakaba 1.50 }, {
3710     %HTMLAttrStatus,
3711     max => FEATURE_HTML5_DEFAULT,
3712     value => FEATURE_HTML5_DEFAULT,
3713 wakaba 1.1 }),
3714     };
3715    
3716     $Element->{$HTML_NS}->{code} = {
3717 wakaba 1.40 %HTMLPhrasingContentChecker,
3718 wakaba 1.187 status => FEATURE_HTML5_REC,
3719 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3720     %HTMLAttrStatus,
3721 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3722 wakaba 1.187 lang => FEATURE_HTML5_REC,
3723 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3724 wakaba 1.49 }),
3725 wakaba 1.1 };
3726    
3727     $Element->{$HTML_NS}->{var} = {
3728 wakaba 1.40 %HTMLPhrasingContentChecker,
3729 wakaba 1.187 status => FEATURE_HTML5_REC,
3730 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3731     %HTMLAttrStatus,
3732 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3733 wakaba 1.187 lang => FEATURE_HTML5_REC,
3734 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3735 wakaba 1.49 }),
3736 wakaba 1.1 };
3737    
3738     $Element->{$HTML_NS}->{samp} = {
3739 wakaba 1.40 %HTMLPhrasingContentChecker,
3740 wakaba 1.187 status => FEATURE_HTML5_REC,
3741 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3742     %HTMLAttrStatus,
3743 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3744 wakaba 1.187 lang => FEATURE_HTML5_REC,
3745 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3746 wakaba 1.49 }),
3747 wakaba 1.1 };
3748    
3749     $Element->{$HTML_NS}->{kbd} = {
3750 wakaba 1.40 %HTMLPhrasingContentChecker,
3751 wakaba 1.187 status => FEATURE_HTML5_REC,
3752 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3753     %HTMLAttrStatus,
3754 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3755 wakaba 1.187 lang => FEATURE_HTML5_REC,
3756 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3757 wakaba 1.49 }),
3758 wakaba 1.1 };
3759    
3760     $Element->{$HTML_NS}->{sub} = {
3761 wakaba 1.40 %HTMLPhrasingContentChecker,
3762 wakaba 1.187 status => FEATURE_HTML5_REC,
3763 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3764     %HTMLAttrStatus,
3765 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3766 wakaba 1.187 lang => FEATURE_HTML5_REC,
3767 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3768 wakaba 1.49 }),
3769 wakaba 1.1 };
3770    
3771 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3772 wakaba 1.1
3773     $Element->{$HTML_NS}->{span} = {
3774 wakaba 1.40 %HTMLPhrasingContentChecker,
3775 wakaba 1.187 status => FEATURE_HTML5_REC,
3776 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3777     %HTMLAttrStatus,
3778 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3779 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3780     dataformatas => FEATURE_HTML4_REC_RESERVED,
3781     datasrc => FEATURE_HTML4_REC_RESERVED,
3782 wakaba 1.187 lang => FEATURE_HTML5_REC,
3783 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3784 wakaba 1.49 }),
3785 wakaba 1.1 };
3786    
3787     $Element->{$HTML_NS}->{i} = {
3788 wakaba 1.40 %HTMLPhrasingContentChecker,
3789 wakaba 1.187 status => FEATURE_HTML5_REC,
3790 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3791     %HTMLAttrStatus,
3792     %HTMLM12NCommonAttrStatus,
3793 wakaba 1.187 lang => FEATURE_HTML5_REC,
3794 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3795 wakaba 1.49 }),
3796 wakaba 1.1 };
3797    
3798 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3799    
3800 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3801     %HTMLPhrasingContentChecker,
3802     status => FEATURE_M12N10_REC,
3803     check_attrs => $GetHTMLAttrsChecker->({}, {
3804     %HTMLAttrStatus,
3805     %HTMLM12NCommonAttrStatus,
3806 wakaba 1.187 lang => FEATURE_HTML5_REC,
3807 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3808     }),
3809     };
3810 wakaba 1.51
3811     $Element->{$HTML_NS}->{s} = {
3812 wakaba 1.40 %HTMLPhrasingContentChecker,
3813 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3814 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3815     %HTMLAttrStatus,
3816     %HTMLM12NCommonAttrStatus,
3817 wakaba 1.187 lang => FEATURE_HTML5_REC,
3818 wakaba 1.49 }),
3819 wakaba 1.1 };
3820    
3821 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3822    
3823     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3824    
3825 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3826 wakaba 1.40 %HTMLPhrasingContentChecker,
3827 wakaba 1.187 status => FEATURE_HTML5_REC,
3828 wakaba 1.40 check_attrs => sub {
3829     my ($self, $item, $element_state) = @_;
3830 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3831     %HTMLAttrStatus,
3832 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3833     dir => FEATURE_HTML5_REC,
3834     id => FEATURE_HTML5_REC,
3835     style => FEATURE_HTML5_REC,
3836     title => FEATURE_HTML5_REC,
3837     lang => FEATURE_HTML5_REC,
3838 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3839     sdasuff => FEATURE_HTML2X_RFC,
3840 wakaba 1.49 })->($self, $item, $element_state);
3841 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3842     $self->{onerror}->(node => $item->{node},
3843 wakaba 1.104 type => 'attribute missing',
3844     text => 'dir',
3845     level => $self->{level}->{must});
3846 wakaba 1.1 }
3847     },
3848     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3849     };
3850    
3851 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3852     %HTMLPhrasingContentChecker,
3853 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
3854 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
3855     %HTMLAttrStatus,
3856     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3857 wakaba 1.187 lang => FEATURE_HTML5_REC,
3858 wakaba 1.99 }),
3859     check_start => sub {
3860     my ($self, $item, $element_state) = @_;
3861    
3862     $element_state->{phase} = 'before-rb';
3863     #$element_state->{has_sig}
3864 wakaba 1.100
3865     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3866     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3867 wakaba 1.99 },
3868     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3869     check_child_element => sub {
3870     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3871     $child_is_transparent, $element_state) = @_;
3872 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3873     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3874 wakaba 1.99 $self->{onerror}->(node => $child_el,
3875     type => 'element not allowed:minus',
3876 wakaba 1.104 level => $self->{level}->{must});
3877 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3878     #
3879     } elsif ($element_state->{phase} eq 'before-rb') {
3880     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3881     $element_state->{phase} = 'in-rb';
3882     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3883     $self->{onerror}->(node => $child_el,
3884 wakaba 1.104 level => $self->{level}->{should},
3885     type => 'no significant content before');
3886 wakaba 1.99 $element_state->{phase} = 'after-rt';
3887     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3888     $self->{onerror}->(node => $child_el,
3889 wakaba 1.104 level => $self->{level}->{should},
3890     type => 'no significant content before');
3891 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3892     } else {
3893     $self->{onerror}->(node => $child_el,
3894 wakaba 1.104 type => 'element not allowed:ruby base',
3895     level => $self->{level}->{must});
3896 wakaba 1.99 $element_state->{phase} = 'in-rb';
3897     }
3898     } elsif ($element_state->{phase} eq 'in-rb') {
3899     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3900     #$element_state->{phase} = 'in-rb';
3901     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3902     unless ($element_state->{has_significant}) {
3903     $self->{onerror}->(node => $child_el,
3904 wakaba 1.104 level => $self->{level}->{should},
3905     type => 'no significant content before');
3906 wakaba 1.99 }
3907     $element_state->{phase} = 'after-rt';
3908     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3909     unless ($element_state->{has_significant}) {
3910     $self->{onerror}->(node => $child_el,
3911 wakaba 1.104 level => $self->{level}->{should},
3912     type => 'no significant content before');
3913 wakaba 1.99 }
3914     $element_state->{phase} = 'after-rp1';
3915     } else {
3916     $self->{onerror}->(node => $child_el,
3917 wakaba 1.104 type => 'element not allowed:ruby base',
3918     level => $self->{level}->{must});
3919 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3920     }
3921     } elsif ($element_state->{phase} eq 'after-rt') {
3922     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3923     if ($element_state->{has_significant}) {
3924     $element_state->{has_sig} = 1;
3925     delete $element_state->{has_significant};
3926     }
3927     $element_state->{phase} = 'in-rb';
3928     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3929     $self->{onerror}->(node => $child_el,
3930 wakaba 1.104 level => $self->{level}->{should},
3931     type => 'no significant content before');
3932 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3933     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3934     $self->{onerror}->(node => $child_el,
3935 wakaba 1.104 level => $self->{level}->{should},
3936     type => 'no significant content before');
3937 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3938     } else {
3939     $self->{onerror}->(node => $child_el,
3940 wakaba 1.104 type => 'element not allowed:ruby base',
3941     level => $self->{level}->{must});
3942 wakaba 1.99 if ($element_state->{has_significant}) {
3943     $element_state->{has_sig} = 1;
3944     delete $element_state->{has_significant};
3945     }
3946     $element_state->{phase} = 'in-rb';
3947     }
3948     } elsif ($element_state->{phase} eq 'after-rp1') {
3949     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3950     $element_state->{phase} = 'after-rp-rt';
3951     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3952     $self->{onerror}->(node => $child_el,
3953 wakaba 1.104 type => 'ps element missing',
3954     text => 'rt',
3955     level => $self->{level}->{must});
3956 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3957     } else {
3958     $self->{onerror}->(node => $child_el,
3959 wakaba 1.104 type => 'ps element missing',
3960     text => 'rt',
3961     level => $self->{level}->{must});
3962 wakaba 1.99 $self->{onerror}->(node => $child_el,
3963 wakaba 1.104 type => 'ps element missing',
3964     text => 'rp',
3965     level => $self->{level}->{must});
3966 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3967     $self->{onerror}->(node => $child_el,
3968 wakaba 1.104 type => 'element not allowed:ruby base',
3969     level => $self->{level}->{must});
3970 wakaba 1.99 }
3971     if ($element_state->{has_significant}) {
3972     $element_state->{has_sig} = 1;
3973     delete $element_state->{has_significant};
3974     }
3975     $element_state->{phase} = 'in-rb';
3976     }
3977     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3978     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3979     $element_state->{phase} = 'after-rp2';
3980     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3981     $self->{onerror}->(node => $child_el,
3982 wakaba 1.104 type => 'ps element missing',
3983     text => 'rp',
3984     level => $self->{level}->{must});
3985 wakaba 1.99 $self->{onerror}->(node => $child_el,
3986 wakaba 1.104 level => $self->{level}->{should},
3987     type => 'no significant content before');
3988 wakaba 1.99 $element_state->{phase} = 'after-rt';
3989     } else {
3990     $self->{onerror}->(node => $child_el,
3991 wakaba 1.104 type => 'ps element missing',
3992     text => 'rp',
3993     level => $self->{level}->{must});
3994 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3995     $self->{onerror}->(node => $child_el,
3996 wakaba 1.104 type => 'element not allowed:ruby base',
3997     level => $self->{level}->{must});
3998 wakaba 1.99 }
3999     if ($element_state->{has_significant}) {
4000     $element_state->{has_sig} = 1;
4001     delete $element_state->{has_significant};
4002     }
4003     $element_state->{phase} = 'in-rb';
4004     }
4005     } elsif ($element_state->{phase} eq 'after-rp2') {
4006     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4007     if ($element_state->{has_significant}) {
4008     $element_state->{has_sig} = 1;
4009     delete $element_state->{has_significant};
4010     }
4011     $element_state->{phase} = 'in-rb';
4012     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
4013     $self->{onerror}->(node => $child_el,
4014 wakaba 1.104 level => $self->{level}->{should},
4015     type => 'no significant content before');
4016 wakaba 1.99 $element_state->{phase} = 'after-rt';
4017     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
4018     $self->{onerror}->(node => $child_el,
4019 wakaba 1.104 level => $self->{level}->{should},
4020     type => 'no significant content before');
4021 wakaba 1.99 $element_state->{phase} = 'after-rp1';
4022     } else {
4023     $self->{onerror}->(node => $child_el,
4024 wakaba 1.104 type => 'element not allowed:ruby base',
4025     level => $self->{level}->{must});
4026 wakaba 1.99 if ($element_state->{has_significant}) {
4027     $element_state->{has_sig} = 1;
4028     delete $element_state->{has_significant};
4029     }
4030     $element_state->{phase} = 'in-rb';
4031     }
4032     } else {
4033     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
4034     }
4035     },
4036     check_child_text => sub {
4037     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4038     if ($has_significant) {
4039     if ($element_state->{phase} eq 'before-rb') {
4040     $element_state->{phase} = 'in-rb';
4041     } elsif ($element_state->{phase} eq 'in-rb') {
4042     #
4043     } elsif ($element_state->{phase} eq 'after-rt' or
4044     $element_state->{phase} eq 'after-rp2') {
4045     $element_state->{phase} = 'in-rb';
4046     } elsif ($element_state->{phase} eq 'after-rp1') {
4047     $self->{onerror}->(node => $child_node,
4048 wakaba 1.104 type => 'ps element missing',
4049     text => 'rt',
4050     level => $self->{level}->{must});
4051 wakaba 1.99 $self->{onerror}->(node => $child_node,
4052 wakaba 1.104 type => 'ps element missing',
4053     text => 'rp',
4054     level => $self->{level}->{must});
4055 wakaba 1.99 $element_state->{phase} = 'in-rb';
4056     } elsif ($element_state->{phase} eq 'after-rp-rt') {
4057     $self->{onerror}->(node => $child_node,
4058 wakaba 1.104 type => 'ps element missing',
4059     text => 'rp',
4060     level => $self->{level}->{must});
4061 wakaba 1.99 $element_state->{phase} = 'in-rb';
4062     } else {
4063     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
4064     }
4065     }
4066     },
4067     check_end => sub {
4068     my ($self, $item, $element_state) = @_;
4069     $self->_remove_minus_elements ($element_state);
4070    
4071     if ($element_state->{phase} eq 'before-rb') {
4072     $self->{onerror}->(node => $item->{node},
4073 wakaba 1.104 level => $self->{level}->{should},
4074 wakaba 1.99 type => 'no significant content');
4075     $self->{onerror}->(node => $item->{node},
4076 wakaba 1.104 type => 'element missing',
4077     text => 'rt',
4078     level => $self->{level}->{must});
4079 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
4080     unless ($element_state->{has_significant}) {
4081     $self->{onerror}->(node => $item->{node},
4082 wakaba 1.104 level => $self->{level}->{should},
4083     type => 'no significant content at the end');
4084 wakaba 1.99 }
4085     $self->{onerror}->(node => $item->{node},
4086 wakaba 1.104 type => 'element missing',
4087     text => 'rt',
4088     level => $self->{level}->{must});
4089 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
4090     $element_state->{phase} eq 'after-rp2') {
4091     #
4092     } elsif ($element_state->{phase} eq 'after-rp1') {
4093     $self->{onerror}->(node => $item->{node},
4094 wakaba 1.104 type => 'element missing',
4095     text => 'rt',
4096     level => $self->{level}->{must});
4097 wakaba 1.99 $self->{onerror}->(node => $item->{node},
4098 wakaba 1.104 type => 'element missing',
4099     text => 'rp',
4100     level => $self->{level}->{must});
4101 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
4102     $self->{onerror}->(node => $item->{node},
4103 wakaba 1.104 type => 'element missing',
4104     text => 'rp',
4105     level => $self->{level}->{must});
4106 wakaba 1.99 } else {
4107     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
4108     }
4109    
4110     ## NOTE: A modified version of |check_end| of %AnyChecker.
4111     if ($element_state->{has_significant} or $element_state->{has_sig}) {
4112     $item->{real_parent_state}->{has_significant} = 1;
4113     }
4114     },
4115     };
4116    
4117     $Element->{$HTML_NS}->{rt} = {
4118     %HTMLPhrasingContentChecker,
4119 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4120 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
4121     %HTMLAttrStatus,
4122     %HTMLM12NXHTML2CommonAttrStatus,
4123 wakaba 1.187 lang => FEATURE_HTML5_REC,
4124 wakaba 1.99 }),
4125     };
4126    
4127     $Element->{$HTML_NS}->{rp} = {
4128 wakaba 1.171 %HTMLPhrasingContentChecker,
4129 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4130 wakaba 1.99 check_attrs => $GetHTMLAttrsChecker->({}, {
4131     %HTMLAttrStatus,
4132     %HTMLM12NXHTML2CommonAttrStatus,
4133 wakaba 1.187 lang => FEATURE_HTML5_REC,
4134 wakaba 1.99 }),
4135 wakaba 1.171 }; # rp
4136 wakaba 1.99
4137 wakaba 1.29 =pod
4138    
4139     ## TODO:
4140    
4141     +
4142     + <p>Partly because of the confusion described above, authors are
4143     + strongly recommended to always mark up all paragraphs with the
4144     + <code>p</code> element, and to not have any <code>ins</code> or
4145     + <code>del</code> elements that cross across any <span
4146     + title="paragraph">implied paragraphs</span>.</p>
4147     +
4148     (An informative note)
4149    
4150     <p><code>ins</code> elements should not cross <span
4151     + title="paragraph">implied paragraph</span> boundaries.</p>
4152     (normative)
4153    
4154     + <p><code>del</code> elements should not cross <span
4155     + title="paragraph">implied paragraph</span> boundaries.</p>
4156     (normative)
4157    
4158     =cut
4159    
4160 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4161 wakaba 1.40 %HTMLTransparentChecker,
4162 wakaba 1.187 status => FEATURE_HTML5_REC,
4163 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4164 wakaba 1.1 cite => $HTMLURIAttrChecker,
4165 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4166 wakaba 1.49 }, {
4167     %HTMLAttrStatus,
4168     %HTMLM12NCommonAttrStatus,
4169 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4170 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4171 wakaba 1.187 lang => FEATURE_HTML5_REC,
4172 wakaba 1.1 }),
4173 wakaba 1.66 check_start => sub {
4174     my ($self, $item, $element_state) = @_;
4175    
4176     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4177 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4178     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4179 wakaba 1.66 },
4180 wakaba 1.1 };
4181    
4182     $Element->{$HTML_NS}->{del} = {
4183 wakaba 1.40 %HTMLTransparentChecker,
4184 wakaba 1.187 status => FEATURE_HTML5_REC,
4185 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4186 wakaba 1.1 cite => $HTMLURIAttrChecker,
4187 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4188 wakaba 1.49 }, {
4189     %HTMLAttrStatus,
4190     %HTMLM12NCommonAttrStatus,
4191 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4192 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4193 wakaba 1.187 lang => FEATURE_HTML5_REC,
4194 wakaba 1.1 }),
4195 wakaba 1.40 check_end => sub {
4196     my ($self, $item, $element_state) = @_;
4197     if ($element_state->{has_significant}) {
4198     ## NOTE: Significantness flag does not propagate.
4199     } elsif ($item->{transparent}) {
4200     #
4201     } else {
4202     $self->{onerror}->(node => $item->{node},
4203 wakaba 1.104 level => $self->{level}->{should},
4204 wakaba 1.40 type => 'no significant content');
4205     }
4206 wakaba 1.1 },
4207 wakaba 1.66 check_start => sub {
4208     my ($self, $item, $element_state) = @_;
4209    
4210     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4211 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4212     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4213 wakaba 1.66 },
4214 wakaba 1.1 };
4215    
4216 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4217 wakaba 1.72 %HTMLFlowContentChecker,
4218 wakaba 1.153 status => FEATURE_HTML5_WD,
4219 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4220 wakaba 1.41 check_child_element => sub {
4221     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4222     $child_is_transparent, $element_state) = @_;
4223 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4224     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4225 wakaba 1.41 $self->{onerror}->(node => $child_el,
4226     type => 'element not allowed:minus',
4227 wakaba 1.104 level => $self->{level}->{must});
4228 wakaba 1.41 $element_state->{has_non_legend} = 1;
4229 wakaba 1.181 $element_state->{has_non_table} = 1;
4230 wakaba 1.41 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4231 wakaba 1.181 $element_state->{has_non_table} = 1;
4232 wakaba 1.41 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4233     if ($element_state->{has_legend_at_first}) {
4234     $self->{onerror}->(node => $child_el,
4235     type => 'element not allowed:figure legend',
4236 wakaba 1.104 level => $self->{level}->{must});
4237 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4238     $self->{onerror}->(node => $element_state->{has_legend},
4239     type => 'element not allowed:figure legend',
4240 wakaba 1.104 level => $self->{level}->{must});
4241 wakaba 1.41 $element_state->{has_legend} = $child_el;
4242     } elsif ($element_state->{has_non_legend}) {
4243     $element_state->{has_legend} = $child_el;
4244     } else {
4245     $element_state->{has_legend_at_first} = 1;
4246 wakaba 1.35 }
4247 wakaba 1.41 delete $element_state->{has_non_legend};
4248     } else {
4249 wakaba 1.181 if ($child_nsuri eq $HTML_NS and $child_ln eq 'table') {
4250     $element_state->{has_table}++;
4251     } else {
4252     $element_state->{has_non_table}++;
4253     }
4254 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4255 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4256 wakaba 1.41 }
4257     },
4258     check_child_text => sub {
4259     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4260     if ($has_significant) {
4261     $element_state->{has_non_legend} = 1;
4262 wakaba 1.181 $element_state->{has_non_table}++;
4263 wakaba 1.35 }
4264 wakaba 1.170
4265     $element_state->{in_figure} = 1;
4266 wakaba 1.41 },
4267     check_end => sub {
4268     my ($self, $item, $element_state) = @_;
4269 wakaba 1.35
4270 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4271     #
4272     } elsif ($element_state->{has_legend}) {
4273     if ($element_state->{has_non_legend}) {
4274     $self->{onerror}->(node => $element_state->{has_legend},
4275 wakaba 1.35 type => 'element not allowed:figure legend',
4276 wakaba 1.104 level => $self->{level}->{must});
4277 wakaba 1.35 }
4278     }
4279 wakaba 1.41
4280 wakaba 1.181 if (($element_state->{has_table} || 0) == 1 and
4281     not $element_state->{has_non_table} and
4282     $element_state->{table_caption_element}) {
4283     $self->{onerror}->(node => $element_state->{table_caption_element},
4284     type => 'element not allowed',
4285     level => $self->{level}->{should});
4286     }
4287    
4288 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4289 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4290 wakaba 1.35 },
4291     };
4292 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4293 wakaba 1.1
4294 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4295     my ($self, $attr) = @_;
4296 wakaba 1.104 $self->{onerror}->(node => $attr,
4297     type => 'unknown attribute',
4298     level => $self->{level}->{uncertain});
4299 wakaba 1.92 };
4300    
4301 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4302 wakaba 1.40 %HTMLEmptyChecker,
4303 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4304 wakaba 1.40 check_attrs => sub {
4305     my ($self, $item, $element_state) = @_;
4306 wakaba 1.1 $GetHTMLAttrsChecker->({
4307 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4308     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4309     }),
4310 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4311 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4312 wakaba 1.1 src => $HTMLURIAttrChecker,
4313     usemap => $HTMLUsemapAttrChecker,
4314 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4315 wakaba 1.1 ismap => sub {
4316 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4317     if (not $self->{flag}->{in_a_href}) {
4318 wakaba 1.15 $self->{onerror}->(node => $attr,
4319 wakaba 1.59 type => 'attribute not allowed:ismap',
4320 wakaba 1.104 level => $self->{level}->{must});
4321 wakaba 1.1 }
4322 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4323 wakaba 1.1 },
4324 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4325     ## TODO: HTML4 |name|
4326 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4327 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4328 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4329 wakaba 1.49 }, {
4330     %HTMLAttrStatus,
4331 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4332 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4333 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4334 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4335 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4336 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4337 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4338 wakaba 1.187 lang => FEATURE_HTML5_REC,
4339 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4340     name => FEATURE_M12N10_REC_DEPRECATED,
4341 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4342 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4343     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4344 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4345 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4346 wakaba 1.66 })->($self, $item, $element_state);
4347 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4348     $self->{onerror}->(node => $item->{node},
4349 wakaba 1.104 type => 'attribute missing',
4350     text => 'alt',
4351     level => $self->{level}->{should});
4352 wakaba 1.114 ## TODO: ...
4353 wakaba 1.1 }
4354 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4355     $self->{onerror}->(node => $item->{node},
4356 wakaba 1.104 type => 'attribute missing',
4357     text => 'src',
4358     level => $self->{level}->{must});
4359 wakaba 1.1 }
4360 wakaba 1.66
4361 wakaba 1.114 ## TODO: external resource check
4362    
4363 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4364     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4365     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4366     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4367 wakaba 1.1 },
4368     };
4369    
4370     $Element->{$HTML_NS}->{iframe} = {
4371 wakaba 1.40 %HTMLTextChecker,
4372 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4373 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4374 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4375 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4376 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4377 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4378     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4379     }),
4380     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4381 wakaba 1.1 src => $HTMLURIAttrChecker,
4382 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4383 wakaba 1.49 }, {
4384     %HTMLAttrStatus,
4385     %HTMLM12NCommonAttrStatus,
4386     align => FEATURE_XHTML10_REC,
4387 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4388 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4389 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4390 wakaba 1.187 id => FEATURE_HTML5_REC,
4391 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4392     marginheight => FEATURE_M12N10_REC,
4393     marginwidth => FEATURE_M12N10_REC,
4394 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4395     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4396     sandbox => FEATURE_HTML5_WD,
4397 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4398 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4399     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4400 wakaba 1.187 title => FEATURE_HTML5_REC,
4401 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4402 wakaba 1.1 }),
4403 wakaba 1.66 check_start => sub {
4404     my ($self, $item, $element_state) = @_;
4405    
4406     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4407 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4408     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4409 wakaba 1.66 },
4410 wakaba 1.40 };
4411    
4412 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4413 wakaba 1.40 %HTMLEmptyChecker,
4414 wakaba 1.98 status => FEATURE_HTML5_WD,
4415 wakaba 1.40 check_attrs => sub {
4416     my ($self, $item, $element_state) = @_;
4417 wakaba 1.1 my $has_src;
4418 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4419 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4420     $attr_ns = '' unless defined $attr_ns;
4421     my $attr_ln = $attr->manakai_local_name;
4422     my $checker;
4423 wakaba 1.73
4424     my $status = {
4425     %HTMLAttrStatus,
4426 wakaba 1.153 height => FEATURE_HTML5_LC,
4427 wakaba 1.98 src => FEATURE_HTML5_WD,
4428     type => FEATURE_HTML5_WD,
4429 wakaba 1.153 width => FEATURE_HTML5_LC,
4430 wakaba 1.73 }->{$attr_ln};
4431    
4432 wakaba 1.1 if ($attr_ns eq '') {
4433     if ($attr_ln eq 'src') {
4434     $checker = $HTMLURIAttrChecker;
4435     $has_src = 1;
4436     } elsif ($attr_ln eq 'type') {
4437     $checker = $HTMLIMTAttrChecker;
4438 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4439 wakaba 1.178 $checker = $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 });
4440 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4441     $attr_ln !~ /[A-Z]/) {
4442 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4443     $status = $HTMLDatasetAttrStatus;
4444 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4445 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4446 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4447 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4448     || sub { }; ## NOTE: Any local attribute is ok.
4449 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4450 wakaba 1.117 } else {
4451     $checker = $HTMLAttrChecker->{$attr_ln};
4452 wakaba 1.1 }
4453     }
4454     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4455 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4456     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4457     || $AttrStatus->{$attr_ns}->{''};
4458     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4459 wakaba 1.62
4460 wakaba 1.1 if ($checker) {
4461 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4462 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4463 wakaba 1.54 #
4464 wakaba 1.1 } else {
4465 wakaba 1.104 $self->{onerror}->(node => $attr,
4466     type => 'unknown attribute',
4467     level => $self->{level}->{uncertain});
4468 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4469     }
4470    
4471 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4472 wakaba 1.1 }
4473    
4474     unless ($has_src) {
4475 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4476 wakaba 1.104 type => 'attribute missing',
4477     text => 'src',
4478 wakaba 1.114 level => $self->{level}->{info});
4479     ## NOTE: <embed> without src="" is allowed since revision 1929.
4480     ## We issues an informational message since <embed> w/o src=""
4481     ## is likely an authoring error.
4482 wakaba 1.1 }
4483 wakaba 1.114
4484     ## TODO: external resource check
4485 wakaba 1.66
4486     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4487 wakaba 1.1 },
4488     };
4489    
4490 wakaba 1.49 ## TODO:
4491     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4492     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4493    
4494 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4495 wakaba 1.40 %HTMLTransparentChecker,
4496 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4497 wakaba 1.40 check_attrs => sub {
4498     my ($self, $item, $element_state) = @_;
4499 wakaba 1.1 $GetHTMLAttrsChecker->({
4500 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4501     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4502     }),
4503     archive => $HTMLSpaceURIsAttrChecker,
4504     ## TODO: Relative to @codebase
4505     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4506     classid => $HTMLURIAttrChecker,
4507     codebase => $HTMLURIAttrChecker,
4508     codetype => $HTMLIMTAttrChecker,
4509     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4510 wakaba 1.1 data => $HTMLURIAttrChecker,
4511 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4512     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4513     ## [HTML4] but we don't know how to test this.
4514 wakaba 1.167 form => $HTMLFormAttrChecker,
4515 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4516 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4517 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4518 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4519     ## the name of the browsing context created by the element,
4520     ## if any, but is also used as the form control name of the
4521     ## form control provided by the plugin, if any.
4522 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4523 wakaba 1.1 type => $HTMLIMTAttrChecker,
4524     usemap => $HTMLUsemapAttrChecker,
4525 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4526 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4527 wakaba 1.49 }, {
4528     %HTMLAttrStatus,
4529 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4530 wakaba 1.49 align => FEATURE_XHTML10_REC,
4531 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4532 wakaba 1.49 border => FEATURE_XHTML10_REC,
4533     classid => FEATURE_M12N10_REC,
4534     codebase => FEATURE_M12N10_REC,
4535     codetype => FEATURE_M12N10_REC,
4536 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4537 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4538 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4539     dataformatas => FEATURE_HTML4_REC_RESERVED,
4540     datasrc => FEATURE_HTML4_REC_RESERVED,
4541 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4542 wakaba 1.187 form => FEATURE_HTML5_LC,
4543 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4544 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4545 wakaba 1.187 lang => FEATURE_HTML5_REC,
4546 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4547 wakaba 1.49 standby => FEATURE_M12N10_REC,
4548 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4549 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4550     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4551 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4552 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4553 wakaba 1.66 })->($self, $item, $element_state);
4554 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4555     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4556     $self->{onerror}->(node => $item->{node},
4557 wakaba 1.104 type => 'attribute missing:data|type',
4558     level => $self->{level}->{must});
4559 wakaba 1.1 }
4560     }
4561 wakaba 1.66
4562     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4563     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4564     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4565     ## TODO: archive
4566     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4567 wakaba 1.1 },
4568 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4569 wakaba 1.41 check_child_element => sub {
4570     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4571     $child_is_transparent, $element_state) = @_;
4572 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4573     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4574 wakaba 1.41 $self->{onerror}->(node => $child_el,
4575     type => 'element not allowed:minus',
4576 wakaba 1.104 level => $self->{level}->{must});
4577 wakaba 1.41 $element_state->{has_non_legend} = 1;
4578     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4579     #
4580     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4581     if ($element_state->{has_non_param}) {
4582 wakaba 1.104 $self->{onerror}->(node => $child_el,
4583 wakaba 1.72 type => 'element not allowed:flow',
4584 wakaba 1.104 level => $self->{level}->{must});
4585 wakaba 1.39 }
4586 wakaba 1.41 } else {
4587 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4588 wakaba 1.41 $element_state->{has_non_param} = 1;
4589 wakaba 1.39 }
4590 wakaba 1.25 },
4591 wakaba 1.41 check_child_text => sub {
4592     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4593     if ($has_significant) {
4594     $element_state->{has_non_param} = 1;
4595     }
4596 wakaba 1.42 },
4597     check_end => sub {
4598     my ($self, $item, $element_state) = @_;
4599     if ($element_state->{has_significant}) {
4600 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4601 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4602     ## NOTE: Transparent.
4603     } else {
4604     $self->{onerror}->(node => $item->{node},
4605 wakaba 1.104 level => $self->{level}->{should},
4606 wakaba 1.42 type => 'no significant content');
4607     }
4608     },
4609 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4610 wakaba 1.1 };
4611 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4612     ## What about |<section><object data><style scoped></style>x</object></section>|?
4613     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4614 wakaba 1.1
4615     $Element->{$HTML_NS}->{param} = {
4616 wakaba 1.40 %HTMLEmptyChecker,
4617 wakaba 1.187 status => FEATURE_HTML5_REC,
4618 wakaba 1.40 check_attrs => sub {
4619     my ($self, $item, $element_state) = @_;
4620 wakaba 1.1 $GetHTMLAttrsChecker->({
4621     name => sub { },
4622 wakaba 1.70 type => $HTMLIMTAttrChecker,
4623 wakaba 1.1 value => sub { },
4624 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4625     data => 1, ref => 1, object => 1,
4626     }),
4627 wakaba 1.49 }, {
4628     %HTMLAttrStatus,
4629 wakaba 1.154 href => FEATURE_RDFA_REC,
4630 wakaba 1.187 id => FEATURE_HTML5_REC,
4631 wakaba 1.153 name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4632 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4633 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4634 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4635 wakaba 1.66 })->(@_);
4636 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4637     $self->{onerror}->(node => $item->{node},
4638 wakaba 1.104 type => 'attribute missing',
4639     text => 'name',
4640     level => $self->{level}->{must});
4641 wakaba 1.1 }
4642 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4643     $self->{onerror}->(node => $item->{node},
4644 wakaba 1.104 type => 'attribute missing',
4645     text => 'value',
4646     level => $self->{level}->{must});
4647 wakaba 1.1 }
4648     },
4649     };
4650    
4651     $Element->{$HTML_NS}->{video} = {
4652 wakaba 1.40 %HTMLTransparentChecker,
4653 wakaba 1.48 status => FEATURE_HTML5_LC,
4654 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4655 wakaba 1.1 src => $HTMLURIAttrChecker,
4656     ## TODO: start, loopstart, loopend, end
4657     ## ISSUE: they MUST be "value time offset"s. Value?
4658 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4659 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4660 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4661     controls => $GetHTMLBooleanAttrChecker->('controls'),
4662 wakaba 1.59 poster => $HTMLURIAttrChecker,
4663 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4664     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4665 wakaba 1.50 }, {
4666     %HTMLAttrStatus,
4667 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4668 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4669     controls => FEATURE_HTML5_LC,
4670 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4671 wakaba 1.50 height => FEATURE_HTML5_LC,
4672 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4673     loopstart => FEATURE_HTML5_AT_RISK,
4674     playcount => FEATURE_HTML5_AT_RISK,
4675 wakaba 1.50 poster => FEATURE_HTML5_LC,
4676     src => FEATURE_HTML5_LC,
4677 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4678 wakaba 1.50 width => FEATURE_HTML5_LC,
4679 wakaba 1.1 }),
4680 wakaba 1.42 check_start => sub {
4681     my ($self, $item, $element_state) = @_;
4682     $element_state->{allow_source}
4683     = not $item->{node}->has_attribute_ns (undef, 'src');
4684     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4685     ## NOTE: It might be set true by |check_element|.
4686 wakaba 1.66
4687     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4688     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4689 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4690     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4691 wakaba 1.42 },
4692     check_child_element => sub {
4693     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4694     $child_is_transparent, $element_state) = @_;
4695 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4696     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4697 wakaba 1.42 $self->{onerror}->(node => $child_el,
4698     type => 'element not allowed:minus',
4699 wakaba 1.104 level => $self->{level}->{must});
4700 wakaba 1.42 delete $element_state->{allow_source};
4701     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4702     #
4703     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4704 wakaba 1.45 unless ($element_state->{allow_source}) {
4705 wakaba 1.104 $self->{onerror}->(node => $child_el,
4706 wakaba 1.72 type => 'element not allowed:flow',
4707 wakaba 1.104 level => $self->{level}->{must});
4708 wakaba 1.42 }
4709 wakaba 1.45 $element_state->{has_source} = 1;
4710 wakaba 1.1 } else {
4711 wakaba 1.42 delete $element_state->{allow_source};
4712 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4713 wakaba 1.42 }
4714     },
4715     check_child_text => sub {
4716     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4717     if ($has_significant) {
4718     delete $element_state->{allow_source};
4719     }
4720 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4721 wakaba 1.42 },
4722     check_end => sub {
4723     my ($self, $item, $element_state) = @_;
4724     if ($element_state->{has_source} == -1) {
4725     $self->{onerror}->(node => $item->{node},
4726 wakaba 1.104 type => 'child element missing',
4727     text => 'source',
4728     level => $self->{level}->{must});
4729 wakaba 1.1 }
4730 wakaba 1.42
4731     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4732 wakaba 1.1 },
4733     };
4734    
4735     $Element->{$HTML_NS}->{audio} = {
4736 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4737 wakaba 1.48 status => FEATURE_HTML5_LC,
4738 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4739     src => $HTMLURIAttrChecker,
4740     ## TODO: start, loopstart, loopend, end
4741     ## ISSUE: they MUST be "value time offset"s. Value?
4742     ## ISSUE: playcount has no conformance creteria
4743 wakaba 1.183 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4744 wakaba 1.42 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4745     controls => $GetHTMLBooleanAttrChecker->('controls'),
4746 wakaba 1.50 }, {
4747     %HTMLAttrStatus,
4748 wakaba 1.183 autobuffer => FEATURE_HTML5_LC,
4749 wakaba 1.50 autoplay => FEATURE_HTML5_LC,
4750     controls => FEATURE_HTML5_LC,
4751 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4752     loopend => FEATURE_HTML5_AT_RISK,
4753     loopstart => FEATURE_HTML5_AT_RISK,
4754     playcount => FEATURE_HTML5_AT_RISK,
4755 wakaba 1.50 src => FEATURE_HTML5_LC,
4756 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4757 wakaba 1.42 }),
4758 wakaba 1.1 };
4759    
4760     $Element->{$HTML_NS}->{source} = {
4761 wakaba 1.40 %HTMLEmptyChecker,
4762 wakaba 1.153 status => FEATURE_HTML5_LC,
4763 wakaba 1.40 check_attrs => sub {
4764     my ($self, $item, $element_state) = @_;
4765 wakaba 1.1 $GetHTMLAttrsChecker->({
4766 wakaba 1.90 media => $HTMLMQAttrChecker,
4767     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4768     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4769 wakaba 1.1 type => $HTMLIMTAttrChecker,
4770 wakaba 1.50 }, {
4771     %HTMLAttrStatus,
4772 wakaba 1.153 media => FEATURE_HTML5_LC,
4773     pixelratio => FEATURE_HTML5_LC,
4774     src => FEATURE_HTML5_LC,
4775     type => FEATURE_HTML5_LC,
4776 wakaba 1.66 })->(@_);
4777 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4778     $self->{onerror}->(node => $item->{node},
4779 wakaba 1.104 type => 'attribute missing',
4780     text => 'src',
4781     level => $self->{level}->{must});
4782 wakaba 1.1 }
4783 wakaba 1.66
4784     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4785 wakaba 1.1 },
4786     };
4787    
4788     $Element->{$HTML_NS}->{canvas} = {
4789 wakaba 1.40 %HTMLTransparentChecker,
4790 wakaba 1.187 status => FEATURE_HTML5_REC,
4791 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4792 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4793     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4794 wakaba 1.50 }, {
4795     %HTMLAttrStatus,
4796 wakaba 1.187 height => FEATURE_HTML5_REC,
4797     width => FEATURE_HTML5_REC,
4798 wakaba 1.1 }),
4799 wakaba 1.178
4800     # Authors MUST provide alternative content (HTML5 revision 2868) -
4801     # This requirement cannot be checked, since the alternative content
4802     # might be placed outside of the element.
4803     }; # canvas
4804 wakaba 1.1
4805     $Element->{$HTML_NS}->{map} = {
4806 wakaba 1.72 %HTMLFlowContentChecker,
4807 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4808 wakaba 1.40 check_attrs => sub {
4809     my ($self, $item, $element_state) = @_;
4810 wakaba 1.100 my $has_name;
4811 wakaba 1.4 $GetHTMLAttrsChecker->({
4812 wakaba 1.100 name => sub {
4813     my ($self, $attr) = @_;
4814     my $value = $attr->value;
4815     if (length $value) {
4816     ## NOTE: Duplication is not non-conforming.
4817     ## NOTE: Space characters are not non-conforming.
4818     #
4819     } else {
4820     $self->{onerror}->(node => $attr,
4821     type => 'empty attribute value',
4822 wakaba 1.104 level => $self->{level}->{must});
4823 wakaba 1.100 }
4824 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4825 wakaba 1.100 $has_name = [$value, $attr];
4826 wakaba 1.4 },
4827 wakaba 1.49 }, {
4828     %HTMLAttrStatus,
4829 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4830     dir => FEATURE_HTML5_REC,
4831     id => FEATURE_HTML5_REC,
4832     lang => FEATURE_HTML5_REC,
4833 wakaba 1.153 #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4834     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4835 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4836     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4837     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4838     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4839     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4840     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4841     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4842     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4843     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4844     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4845 wakaba 1.187 title => FEATURE_HTML5_REC,
4846 wakaba 1.66 })->(@_);
4847 wakaba 1.100
4848 wakaba 1.135 if ($has_name) {
4849 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4850 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4851 wakaba 1.155 $self->{onerror}
4852     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4853     type => 'id ne name',
4854     level => $self->{level}->{must});
4855 wakaba 1.100 }
4856 wakaba 1.135 } else {
4857 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4858 wakaba 1.104 type => 'attribute missing',
4859     text => 'name',
4860     level => $self->{level}->{must});
4861 wakaba 1.100 }
4862 wakaba 1.4 },
4863 wakaba 1.59 check_start => sub {
4864     my ($self, $item, $element_state) = @_;
4865     $element_state->{in_map_original} = $self->{flag}->{in_map};
4866 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4867     ## NOTE: |{in_map}| is a reference to the array which contains
4868     ## hash references. Hashes are corresponding to the opening
4869     ## |map| elements and each of them contains the key-value
4870     ## pairs corresponding to the absolute URLs for the processed
4871     ## |area| elements in the |map| element corresponding to the
4872     ## hash. The key represents the resource (## TODO: use
4873     ## absolute URL), while the value represents whether there is
4874     ## an |area| element whose |alt| attribute is specified to a
4875     ## non-empty value. If there IS such an |area| element for
4876     ## the resource specified by the key, then the value is set to
4877     ## zero (|0|). Otherwise, if there is no such an |area|
4878     ## element but there is any |area| element with the empty
4879     ## |alt=""| attribute, then the value contains an array
4880     ## reference that contains all of such |area| elements.
4881 wakaba 1.79
4882     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4883     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4884 wakaba 1.59 },
4885     check_end => sub {
4886     my ($self, $item, $element_state) = @_;
4887 wakaba 1.137
4888     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4889     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4890     next unless $nodes;
4891     for (@$nodes) {
4892     $self->{onerror}->(type => 'empty area alt',
4893     node => $_,
4894     level => $self->{level}->{html5_no_may});
4895     }
4896     }
4897    
4898     $self->{flag}->{in_map} = $element_state->{in_map_original};
4899    
4900 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4901 wakaba 1.59 },
4902 wakaba 1.1 };
4903    
4904     $Element->{$HTML_NS}->{area} = {
4905 wakaba 1.40 %HTMLEmptyChecker,
4906 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4907 wakaba 1.40 check_attrs => sub {
4908     my ($self, $item, $element_state) = @_;
4909 wakaba 1.1 my %attr;
4910     my $coords;
4911 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4912 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4913     $attr_ns = '' unless defined $attr_ns;
4914     my $attr_ln = $attr->manakai_local_name;
4915     my $checker;
4916 wakaba 1.73 my $status;
4917 wakaba 1.1 if ($attr_ns eq '') {
4918 wakaba 1.73 $status = {
4919     %HTMLAttrStatus,
4920     %HTMLM12NCommonAttrStatus,
4921 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
4922 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4923     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4924 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4925 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4926 wakaba 1.187 lang => FEATURE_HTML5_REC,
4927 wakaba 1.154 media => FEATURE_HTML5_WD,
4928 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4929     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4930     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4931 wakaba 1.153 ping => FEATURE_HTML5_WD,
4932 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4933 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4934 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4935 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4936     type => FEATURE_HTML5_WD,
4937 wakaba 1.73 }->{$attr_ln};
4938    
4939 wakaba 1.1 $checker = {
4940 wakaba 1.153 alt => sub {
4941     ## NOTE: Checked later.
4942     },
4943 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4944     circ => -1, circle => 1,
4945     default => 1,
4946     poly => 1, polygon => -1,
4947     rect => 1, rectangle => -1,
4948     }),
4949     coords => sub {
4950     my ($self, $attr) = @_;
4951     my $value = $attr->value;
4952     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4953     $coords = [split /,/, $value];
4954     } else {
4955     $self->{onerror}->(node => $attr,
4956 wakaba 1.104 type => 'coords:syntax error',
4957     level => $self->{level}->{must});
4958 wakaba 1.1 }
4959     },
4960 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4961     target => $HTMLTargetAttrChecker,
4962 wakaba 1.1 href => $HTMLURIAttrChecker,
4963     ping => $HTMLSpaceURIsAttrChecker,
4964 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4965 wakaba 1.1 media => $HTMLMQAttrChecker,
4966     hreflang => $HTMLLanguageTagAttrChecker,
4967     type => $HTMLIMTAttrChecker,
4968     }->{$attr_ln};
4969     if ($checker) {
4970     $attr{$attr_ln} = $attr;
4971 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4972     $attr_ln !~ /[A-Z]/) {
4973 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4974     $status = $HTMLDatasetAttrStatus;
4975 wakaba 1.1 } else {
4976     $checker = $HTMLAttrChecker->{$attr_ln};
4977     }
4978     }
4979     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4980 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4981     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4982     || $AttrStatus->{$attr_ns}->{''};
4983     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4984 wakaba 1.62
4985 wakaba 1.1 if ($checker) {
4986 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4987 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4988 wakaba 1.54 #
4989 wakaba 1.1 } else {
4990 wakaba 1.104 $self->{onerror}->(node => $attr,
4991     type => 'unknown attribute',
4992     level => $self->{level}->{uncertain});
4993 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4994     }
4995 wakaba 1.49
4996 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4997 wakaba 1.1 }
4998    
4999     if (defined $attr{href}) {
5000 wakaba 1.4 $self->{has_hyperlink_element} = 1;
5001 wakaba 1.137 if (defined $attr{alt}) {
5002     my $url = $attr{href}->value; ## TODO: resolve
5003     if (length $attr{alt}->value) {
5004     for (@{$self->{flag}->{in_map} or []}) {
5005     $_->{$url} = 0;
5006     }
5007     } else {
5008     ## NOTE: Empty |alt=""|. If there is another |area| element
5009     ## with the same |href=""| and that |area| elemnet's
5010     ## |alt=""| attribute is not an empty string, then this
5011     ## is conforming.
5012     for (@{$self->{flag}->{in_map} or []}) {
5013     push @{$_->{$url} ||= []}, $attr{alt}
5014     unless exists $_->{$url} and not $_->{$url};
5015     }
5016     }
5017     } else {
5018 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5019 wakaba 1.104 type => 'attribute missing',
5020     text => 'alt',
5021     level => $self->{level}->{must});
5022 wakaba 1.1 }
5023     } else {
5024     for (qw/target ping rel media hreflang type alt/) {
5025     if (defined $attr{$_}) {
5026     $self->{onerror}->(node => $attr{$_},
5027 wakaba 1.104 type => 'attribute not allowed',
5028     level => $self->{level}->{must});
5029 wakaba 1.1 }
5030     }
5031     }
5032    
5033     my $shape = 'rectangle';
5034     if (defined $attr{shape}) {
5035     $shape = {
5036     circ => 'circle', circle => 'circle',
5037     default => 'default',
5038     poly => 'polygon', polygon => 'polygon',
5039     rect => 'rectangle', rectangle => 'rectangle',
5040     }->{lc $attr{shape}->value} || 'rectangle';
5041     ## TODO: ASCII lowercase?
5042     }
5043    
5044     if ($shape eq 'circle') {
5045     if (defined $attr{coords}) {
5046     if (defined $coords) {
5047     if (@$coords == 3) {
5048     if ($coords->[2] < 0) {
5049     $self->{onerror}->(node => $attr{coords},
5050 wakaba 1.104 type => 'coords:out of range',
5051     index => 2,
5052     value => $coords->[2],
5053     level => $self->{level}->{must});
5054 wakaba 1.1 }
5055     } else {
5056     $self->{onerror}->(node => $attr{coords},
5057 wakaba 1.104 type => 'coords:number not 3',
5058     text => 0+@$coords,
5059     level => $self->{level}->{must});
5060 wakaba 1.1 }
5061     } else {
5062     ## NOTE: A syntax error has been reported.
5063     }
5064     } else {
5065 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5066 wakaba 1.104 type => 'attribute missing',
5067     text => 'coords',
5068     level => $self->{level}->{must});
5069 wakaba 1.1 }
5070     } elsif ($shape eq 'default') {
5071     if (defined $attr{coords}) {
5072     $self->{onerror}->(node => $attr{coords},
5073 wakaba 1.104 type => 'attribute not allowed',
5074     level => $self->{level}->{must});
5075 wakaba 1.1 }
5076     } elsif ($shape eq 'polygon') {
5077     if (defined $attr{coords}) {
5078     if (defined $coords) {
5079     if (@$coords >= 6) {
5080     unless (@$coords % 2 == 0) {
5081     $self->{onerror}->(node => $attr{coords},
5082 wakaba 1.104 type => 'coords:number not even',
5083     text => 0+@$coords,
5084     level => $self->{level}->{must});
5085 wakaba 1.1 }
5086     } else {
5087     $self->{onerror}->(node => $attr{coords},
5088 wakaba 1.104 type => 'coords:number lt 6',
5089     text => 0+@$coords,
5090     level => $self->{level}->{must});
5091 wakaba 1.1 }
5092     } else {
5093     ## NOTE: A syntax error has been reported.
5094     }
5095     } else {
5096 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5097 wakaba 1.104 type => 'attribute missing',
5098     text => 'coords',
5099     level => $self->{level}->{must});
5100 wakaba 1.1 }
5101     } elsif ($shape eq 'rectangle') {
5102     if (defined $attr{coords}) {
5103     if (defined $coords) {
5104     if (@$coords == 4) {
5105     unless ($coords->[0] < $coords->[2]) {
5106     $self->{onerror}->(node => $attr{coords},
5107 wakaba 1.104 type => 'coords:out of range',
5108     index => 0,
5109     value => $coords->[0],
5110     level => $self->{level}->{must});
5111 wakaba 1.1 }
5112     unless ($coords->[1] < $coords->[3]) {
5113     $self->{onerror}->(node => $attr{coords},
5114 wakaba 1.104 type => 'coords:out of range',
5115     index => 1,
5116     value => $coords->[1],
5117     level => $self->{level}->{must});
5118 wakaba 1.1 }
5119     } else {
5120     $self->{onerror}->(node => $attr{coords},
5121 wakaba 1.104 type => 'coords:number not 4',
5122     text => 0+@$coords,
5123     level => $self->{level}->{must});
5124 wakaba 1.1 }
5125     } else {
5126     ## NOTE: A syntax error has been reported.
5127     }
5128     } else {
5129 wakaba 1.40 $self->{onerror}->(node => $item->{node},
5130 wakaba 1.104 type => 'attribute missing',
5131     text => 'coords',
5132     level => $self->{level}->{must});
5133 wakaba 1.1 }
5134     }
5135 wakaba 1.66
5136     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
5137 wakaba 1.1 },
5138 wakaba 1.59 check_start => sub {
5139     my ($self, $item, $element_state) = @_;
5140     unless ($self->{flag}->{in_map} or
5141     not $item->{node}->manakai_parent_element) {
5142     $self->{onerror}->(node => $item->{node},
5143     type => 'element not allowed:area',
5144 wakaba 1.104 level => $self->{level}->{must});
5145 wakaba 1.59 }
5146 wakaba 1.79
5147     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5148     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5149 wakaba 1.59 },
5150 wakaba 1.1 };
5151    
5152     $Element->{$HTML_NS}->{table} = {
5153 wakaba 1.40 %HTMLChecker,
5154 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5155 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5156 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
5157     cellspacing => $HTMLLengthAttrChecker,
5158 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
5159     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
5160     lhs => 1, rhs => 1, box => 1, border => 1,
5161     }),
5162     rules => $GetHTMLEnumeratedAttrChecker->({
5163     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
5164     }),
5165     summary => sub {}, ## NOTE: %Text; in HTML4.
5166     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
5167     }, {
5168 wakaba 1.49 %HTMLAttrStatus,
5169 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5170 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5171     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5172     border => FEATURE_M12N10_REC,
5173     cellpadding => FEATURE_M12N10_REC,
5174     cellspacing => FEATURE_M12N10_REC,
5175 wakaba 1.61 cols => FEATURE_RFC1942,
5176 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5177     dataformatas => FEATURE_HTML4_REC_RESERVED,
5178     datapagesize => FEATURE_M12N10_REC,
5179     datasrc => FEATURE_HTML4_REC_RESERVED,
5180     frame => FEATURE_M12N10_REC,
5181 wakaba 1.187 lang => FEATURE_HTML5_REC,
5182 wakaba 1.49 rules => FEATURE_M12N10_REC,
5183     summary => FEATURE_M12N10_REC,
5184     width => FEATURE_M12N10_REC,
5185     }),
5186 wakaba 1.40 check_start => sub {
5187     my ($self, $item, $element_state) = @_;
5188     $element_state->{phase} = 'before caption';
5189 wakaba 1.66
5190     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5191 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5192     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5193 wakaba 1.40 },
5194     check_child_element => sub {
5195     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5196     $child_is_transparent, $element_state) = @_;
5197 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5198     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5199 wakaba 1.40 $self->{onerror}->(node => $child_el,
5200     type => 'element not allowed:minus',
5201 wakaba 1.104 level => $self->{level}->{must});
5202 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5203     #
5204     } elsif ($element_state->{phase} eq 'in tbodys') {
5205     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5206     #$element_state->{phase} = 'in tbodys';
5207     } elsif (not $element_state->{has_tfoot} and
5208     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5209     $element_state->{phase} = 'after tfoot';
5210     $element_state->{has_tfoot} = 1;
5211     } else {
5212 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5213     level => $self->{level}->{must});
5214 wakaba 1.40 }
5215     } elsif ($element_state->{phase} eq 'in trs') {
5216     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5217     #$element_state->{phase} = 'in trs';
5218     } elsif (not $element_state->{has_tfoot} and
5219     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5220     $element_state->{phase} = 'after tfoot';
5221     $element_state->{has_tfoot} = 1;
5222     } else {
5223 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5224     level => $self->{level}->{must});
5225 wakaba 1.40 }
5226     } elsif ($element_state->{phase} eq 'after thead') {
5227     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5228     $element_state->{phase} = 'in tbodys';
5229     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5230     $element_state->{phase} = 'in trs';
5231     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5232     $element_state->{phase} = 'in tbodys';
5233     $element_state->{has_tfoot} = 1;
5234     } else {
5235 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5236     level => $self->{level}->{must});
5237 wakaba 1.40 }
5238     } elsif ($element_state->{phase} eq 'in colgroup') {
5239     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5240     $element_state->{phase} = 'in colgroup';
5241     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5242     $element_state->{phase} = 'after thead';
5243     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5244     $element_state->{phase} = 'in tbodys';
5245     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5246     $element_state->{phase} = 'in trs';
5247     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5248     $element_state->{phase} = 'in tbodys';
5249     $element_state->{has_tfoot} = 1;
5250     } else {
5251 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5252     level => $self->{level}->{must});
5253 wakaba 1.40 }
5254     } elsif ($element_state->{phase} eq 'before caption') {
5255     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5256 wakaba 1.181 $item->{parent_state}->{table_caption_element} = $child_el;
5257 wakaba 1.40 $element_state->{phase} = 'in colgroup';
5258     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5259     $element_state->{phase} = 'in colgroup';
5260     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5261     $element_state->{phase} = 'after thead';
5262     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5263     $element_state->{phase} = 'in tbodys';
5264     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5265     $element_state->{phase} = 'in trs';
5266     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5267     $element_state->{phase} = 'in tbodys';
5268     $element_state->{has_tfoot} = 1;
5269     } else {
5270 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5271     level => $self->{level}->{must});
5272 wakaba 1.40 }
5273     } elsif ($element_state->{phase} eq 'after tfoot') {
5274 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5275     level => $self->{level}->{must});
5276 wakaba 1.40 } else {
5277     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5278     }
5279     },
5280     check_child_text => sub {
5281     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5282     if ($has_significant) {
5283 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5284     level => $self->{level}->{must});
5285 wakaba 1.1 }
5286 wakaba 1.40 },
5287     check_end => sub {
5288     my ($self, $item, $element_state) = @_;
5289 wakaba 1.1
5290     ## Table model errors
5291     require Whatpm::HTMLTable;
5292 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5293 wakaba 1.104 $self->{onerror}->(@_);
5294     }, $self->{level});
5295 wakaba 1.87 Whatpm::HTMLTable->assign_header
5296 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5297 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5298 wakaba 1.1
5299 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5300 wakaba 1.1 },
5301     };
5302    
5303     $Element->{$HTML_NS}->{caption} = {
5304 wakaba 1.169 %HTMLFlowContentChecker,
5305 wakaba 1.187 status => FEATURE_HTML5_REC,
5306 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5307     align => $GetHTMLEnumeratedAttrChecker->({
5308     top => 1, bottom => 1, left => 1, right => 1,
5309     }),
5310     }, {
5311 wakaba 1.49 %HTMLAttrStatus,
5312 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5313 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5314 wakaba 1.187 lang => FEATURE_HTML5_REC,
5315 wakaba 1.49 }),
5316 wakaba 1.169 check_start => sub {
5317     my ($self, $item, $element_state) = @_;
5318     $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5319    
5320     $HTMLFlowContentChecker{check_start}->(@_);
5321     },
5322     check_end => sub {
5323     my ($self, $item, $element_state) = @_;
5324     $self->_remove_minus_elements ($element_state);
5325    
5326     $HTMLFlowContentChecker{check_end}->(@_);
5327     },
5328     }; # caption
5329 wakaba 1.1
5330 wakaba 1.69 my %cellalign = (
5331     ## HTML4 %cellhalign;
5332 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5333     left => 1, center => 1, right => 1, justify => 1, char => 1,
5334     }),
5335     char => sub {
5336     my ($self, $attr) = @_;
5337 wakaba 1.69
5338 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5339    
5340     my $value = $attr->value;
5341     if (length $value != 1) {
5342     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5343 wakaba 1.105 level => $self->{level}->{html4_fact});
5344 wakaba 1.70 }
5345     },
5346 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5347    
5348 wakaba 1.69 ## HTML4 %cellvalign;
5349 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5350     top => 1, middle => 1, bottom => 1, baseline => 1,
5351     }),
5352 wakaba 1.69 );
5353    
5354 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5355 wakaba 1.40 %HTMLEmptyChecker,
5356 wakaba 1.187 status => FEATURE_HTML5_REC,
5357 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5358 wakaba 1.69 %cellalign,
5359 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5360     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5361     ## TODO: "attribute not supported" if |col|.
5362     ## ISSUE: MUST NOT if any |col|?
5363     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5364 wakaba 1.49 }, {
5365     %HTMLAttrStatus,
5366 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5367 wakaba 1.49 align => FEATURE_M12N10_REC,
5368     char => FEATURE_M12N10_REC,
5369     charoff => FEATURE_M12N10_REC,
5370 wakaba 1.187 lang => FEATURE_HTML5_REC,
5371 wakaba 1.153 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5372 wakaba 1.49 valign => FEATURE_M12N10_REC,
5373     width => FEATURE_M12N10_REC,
5374 wakaba 1.1 }),
5375 wakaba 1.40 check_child_element => sub {
5376     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5377     $child_is_transparent, $element_state) = @_;
5378 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5379     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5380 wakaba 1.40 $self->{onerror}->(node => $child_el,
5381     type => 'element not allowed:minus',
5382 wakaba 1.104 level => $self->{level}->{must});
5383 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5384     #
5385     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5386     #
5387     } else {
5388 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5389     level => $self->{level}->{must});
5390 wakaba 1.40 }
5391     },
5392     check_child_text => sub {
5393     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5394     if ($has_significant) {
5395 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5396     level => $self->{level}->{must});
5397 wakaba 1.1 }
5398     },
5399     };
5400    
5401     $Element->{$HTML_NS}->{col} = {
5402 wakaba 1.40 %HTMLEmptyChecker,
5403 wakaba 1.187 status => FEATURE_HTML5_REC,
5404 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5405 wakaba 1.69 %cellalign,
5406 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5407 wakaba 1.49 }, {
5408     %HTMLAttrStatus,
5409 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5410 wakaba 1.49 align => FEATURE_M12N10_REC,
5411     char => FEATURE_M12N10_REC,
5412     charoff => FEATURE_M12N10_REC,
5413 wakaba 1.187 lang => FEATURE_HTML5_REC,
5414 wakaba 1.153 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5415 wakaba 1.49 valign => FEATURE_M12N10_REC,
5416     width => FEATURE_M12N10_REC,
5417 wakaba 1.1 }),
5418     };
5419    
5420     $Element->{$HTML_NS}->{tbody} = {
5421 wakaba 1.40 %HTMLChecker,
5422 wakaba 1.187 status => FEATURE_HTML5_REC,
5423 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5424     %cellalign,
5425     }, {
5426 wakaba 1.49 %HTMLAttrStatus,
5427 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5428 wakaba 1.49 align => FEATURE_M12N10_REC,
5429     char => FEATURE_M12N10_REC,
5430     charoff => FEATURE_M12N10_REC,
5431 wakaba 1.187 lang => FEATURE_HTML5_REC,
5432 wakaba 1.49 valign => FEATURE_M12N10_REC,
5433     }),
5434 wakaba 1.40 check_child_element => sub {
5435     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5436     $child_is_transparent, $element_state) = @_;
5437 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5438     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5439 wakaba 1.40 $self->{onerror}->(node => $child_el,
5440     type => 'element not allowed:minus',
5441 wakaba 1.104 level => $self->{level}->{must});
5442 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5443     #
5444     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5445 wakaba 1.84 #
5446 wakaba 1.40 } else {
5447 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5448     level => $self->{level}->{must});
5449 wakaba 1.40 }
5450     },
5451     check_child_text => sub {
5452     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5453     if ($has_significant) {
5454 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5455     level => $self->{level}->{must});
5456 wakaba 1.1 }
5457 wakaba 1.40 },
5458 wakaba 1.1 };
5459    
5460     $Element->{$HTML_NS}->{thead} = {
5461 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5462 wakaba 1.1 };
5463    
5464     $Element->{$HTML_NS}->{tfoot} = {
5465 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5466 wakaba 1.1 };
5467    
5468     $Element->{$HTML_NS}->{tr} = {
5469 wakaba 1.40 %HTMLChecker,
5470 wakaba 1.187 status => FEATURE_HTML5_REC,
5471 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5472     %cellalign,
5473     bgcolor => $HTMLColorAttrChecker,
5474     }, {
5475 wakaba 1.49 %HTMLAttrStatus,
5476 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5477 wakaba 1.49 align => FEATURE_M12N10_REC,
5478     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5479     char => FEATURE_M12N10_REC,
5480     charoff => FEATURE_M12N10_REC,
5481 wakaba 1.187 lang => FEATURE_HTML5_REC,
5482 wakaba 1.49 valign => FEATURE_M12N10_REC,
5483     }),
5484 wakaba 1.40 check_child_element => sub {
5485     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5486     $child_is_transparent, $element_state) = @_;
5487 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5488     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5489 wakaba 1.40 $self->{onerror}->(node => $child_el,
5490     type => 'element not allowed:minus',
5491 wakaba 1.104 level => $self->{level}->{must});
5492 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5493     #
5494     } elsif ($child_nsuri eq $HTML_NS and
5495     ($child_ln eq 'td' or $child_ln eq 'th')) {
5496 wakaba 1.84 #
5497 wakaba 1.40 } else {
5498 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5499     level => $self->{level}->{must});
5500 wakaba 1.40 }
5501     },
5502     check_child_text => sub {
5503     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5504     if ($has_significant) {
5505 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5506     level => $self->{level}->{must});
5507 wakaba 1.1 }
5508     },
5509     };
5510    
5511     $Element->{$HTML_NS}->{td} = {
5512 wakaba 1.72 %HTMLFlowContentChecker,
5513 wakaba 1.187 status => FEATURE_HTML5_REC,
5514 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5515 wakaba 1.69 %cellalign,
5516     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5517     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5518     bgcolor => $HTMLColorAttrChecker,
5519 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5520 wakaba 1.87 headers => sub {
5521     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5522     ## Though that method does not check the |headers| attribute of a
5523     ## |td| element if the element does not form a table, in that case
5524     ## the |td| element is non-conforming anyway.
5525     },
5526 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5527 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5528 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5529     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5530 wakaba 1.49 }, {
5531     %HTMLAttrStatus,
5532 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5533     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5534 wakaba 1.49 align => FEATURE_M12N10_REC,
5535 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5536 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5537     char => FEATURE_M12N10_REC,
5538     charoff => FEATURE_M12N10_REC,
5539 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5540 wakaba 1.187 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5541 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5542 wakaba 1.187 lang => FEATURE_HTML5_REC,
5543 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5544 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5545 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5546 wakaba 1.49 valign => FEATURE_M12N10_REC,
5547     width => FEATURE_M12N10_REC_DEPRECATED,
5548 wakaba 1.1 }),
5549     };
5550    
5551     $Element->{$HTML_NS}->{th} = {
5552 wakaba 1.40 %HTMLPhrasingContentChecker,
5553 wakaba 1.187 status => FEATURE_HTML5_REC,
5554 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5555 wakaba 1.69 %cellalign,
5556     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5557     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5558     bgcolor => $HTMLColorAttrChecker,
5559 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5560 wakaba 1.87 ## TODO: HTML4(?) |headers|
5561 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5562 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5563     scope => $GetHTMLEnumeratedAttrChecker
5564     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5565 wakaba 1.49 }, {
5566     %HTMLAttrStatus,
5567 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5568     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5569 wakaba 1.49 align => FEATURE_M12N10_REC,
5570 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5571 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5572     char => FEATURE_M12N10_REC,
5573     charoff => FEATURE_M12N10_REC,
5574 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5575 wakaba 1.187 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5576 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5577 wakaba 1.187 lang => FEATURE_HTML5_REC,
5578 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5579 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5580 wakaba 1.187 scope => FEATURE_HTML5_REC,
5581 wakaba 1.49 valign => FEATURE_M12N10_REC,
5582     width => FEATURE_M12N10_REC_DEPRECATED,
5583 wakaba 1.1 }),
5584     };
5585    
5586 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5587 wakaba 1.121 %HTMLFlowContentChecker,
5588 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5589 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5590 wakaba 1.161 accept => $AcceptAttrChecker,
5591 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5592 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5593 wakaba 1.185 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5594     on => 1, off => 1,
5595     }),
5596 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5597 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5598     'application/x-www-form-urlencoded' => 1,
5599     'multipart/form-data' => 1,
5600     'text/plain' => 1,
5601     }),
5602 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5603     get => 1, post => 1, put => 1, delete => 1,
5604     }),
5605 wakaba 1.133 name => sub {
5606     my ($self, $attr) = @_;
5607    
5608     my $value = $attr->value;
5609     if ($value eq '') {
5610     $self->{onerror}->(type => 'empty form name',
5611     node => $attr,
5612     level => $self->{level}->{must});
5613     } else {
5614     if ($self->{form}->{$value}) {
5615     $self->{onerror}->(type => 'duplicate form name',
5616     node => $attr,
5617     value => $value,
5618     level => $self->{level}->{must});
5619     } else {
5620     $self->{form}->{$value} = 1;
5621     }
5622     }
5623     },
5624 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5625     ## TODO: Tests for following attrs:
5626 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5627     onforminput => $HTMLEventHandlerAttrChecker,
5628 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5629     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5630 wakaba 1.52 target => $HTMLTargetAttrChecker,
5631     }, {
5632     %HTMLAttrStatus,
5633     %HTMLM12NCommonAttrStatus,
5634 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5635 wakaba 1.187 'accept-charset' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5636 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5637 wakaba 1.185 autocomplete => FEATURE_HTML5_WD,
5638 wakaba 1.56 data => FEATURE_WF2,
5639 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5640 wakaba 1.187 lang => FEATURE_HTML5_REC,
5641 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5642     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5643 wakaba 1.187 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5644 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5645 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5646     onforminput => FEATURE_WF2_INFORMATIVE,
5647 wakaba 1.56 onreceived => FEATURE_WF2,
5648 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5649     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5650 wakaba 1.56 replace => FEATURE_WF2,
5651 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5652     sdasuff => FEATURE_HTML20_RFC,
5653 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5654 wakaba 1.52 }),
5655 wakaba 1.66 check_start => sub {
5656     my ($self, $item, $element_state) = @_;
5657 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5658 wakaba 1.66
5659     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5660     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5661 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5662     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5663 wakaba 1.136 $element_state->{id_type} = 'form';
5664 wakaba 1.66 },
5665 wakaba 1.121 check_end => sub {
5666     my ($self, $item, $element_state) = @_;
5667     $self->_remove_minus_elements ($element_state);
5668    
5669     $HTMLFlowContentChecker{check_end}->(@_);
5670     },
5671 wakaba 1.185 }; # form
5672 wakaba 1.52
5673     $Element->{$HTML_NS}->{fieldset} = {
5674 wakaba 1.134 %HTMLFlowContentChecker,
5675 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5676 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5677     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5678 wakaba 1.136 form => $HTMLFormAttrChecker,
5679 wakaba 1.165 name => $FormControlNameAttrChecker,
5680 wakaba 1.56 }, {
5681 wakaba 1.52 %HTMLAttrStatus,
5682     %HTMLM12NCommonAttrStatus,
5683 wakaba 1.187 disabled => FEATURE_HTML5_WD | FEATURE_WF2X,
5684     form => FEATURE_HTML5_LC | FEATURE_WF2X,
5685     lang => FEATURE_HTML5_REC,
5686     name => FEATURE_HTML5_LC,
5687 wakaba 1.52 }),
5688 wakaba 1.134 ## NOTE: legend, Flow
5689     check_child_element => sub {
5690     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5691     $child_is_transparent, $element_state) = @_;
5692     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5693     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5694     $self->{onerror}->(node => $child_el,
5695     type => 'element not allowed:minus',
5696     level => $self->{level}->{must});
5697     $element_state->{has_non_legend} = 1;
5698     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5699     #
5700     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5701     if ($element_state->{has_non_legend}) {
5702     $self->{onerror}->(node => $child_el,
5703     type => 'element not allowed:details legend',
5704     level => $self->{level}->{must});
5705     }
5706     $element_state->{has_legend} = 1;
5707     $element_state->{has_non_legend} = 1;
5708     } else {
5709     $HTMLFlowContentChecker{check_child_element}->(@_);
5710     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5711     ## TODO:
5712 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5713 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5714     ## therefore |details| part of the content model does not match.
5715     }
5716     },
5717     check_child_text => sub {
5718     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5719     if ($has_significant) {
5720     $element_state->{has_non_legend} = 1;
5721     }
5722     },
5723     check_end => sub {
5724     my ($self, $item, $element_state) = @_;
5725    
5726     unless ($element_state->{has_legend}) {
5727     $self->{onerror}->(node => $item->{node},
5728     type => 'child element missing',
5729     text => 'legend',
5730     level => $self->{level}->{must});
5731     }
5732    
5733     $HTMLFlowContentChecker{check_end}->(@_);
5734 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5735 wakaba 1.134 },
5736     ## NOTE: This definition is partially reused by |details| element's
5737     ## checker.
5738 wakaba 1.52 };
5739    
5740     $Element->{$HTML_NS}->{input} = {
5741 wakaba 1.119 %HTMLEmptyChecker,
5742 wakaba 1.187 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5743 wakaba 1.140 check_attrs => sub {
5744     my ($self, $item, $element_state) = @_;
5745 wakaba 1.142
5746 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5747 wakaba 1.142 $state = 'text' unless defined $state;
5748     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5749    
5750 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5751     my $attr_ns = $attr->namespace_uri;
5752     $attr_ns = '' unless defined $attr_ns;
5753     my $attr_ln = $attr->manakai_local_name;
5754     my $checker;
5755     my $status;
5756     if ($attr_ns eq '') {
5757     $status =
5758     {
5759     %HTMLAttrStatus,
5760     %HTMLM12NCommonAttrStatus,
5761     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5762     'accept-charset' => FEATURE_HTML2X_RFC,
5763 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5764 wakaba 1.140 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5765     align => FEATURE_M12N10_REC_DEPRECATED,
5766     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5767 wakaba 1.185 autocomplete => FEATURE_HTML5_LC | FEATURE_WF2X,
5768 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
5769     checked => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5770 wakaba 1.140 datafld => FEATURE_HTML4_REC_RESERVED,
5771     dataformatas => FEATURE_HTML4_REC_RESERVED,
5772     datasrc => FEATURE_HTML4_REC_RESERVED,
5773 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5774 wakaba 1.140 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5775 wakaba 1.187 form => FEATURE_HTML5_LC | FEATURE_WF2X,
5776 wakaba 1.178 height => FEATURE_HTML5_LC,
5777 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5778     FEATURE_XHTMLBASIC11_CR,
5779 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5780 wakaba 1.187 lang => FEATURE_HTML5_REC,
5781     list => FEATURE_HTML5_LC | FEATURE_WF2X,
5782     max => FEATURE_HTML5_LC | FEATURE_WF2X,
5783     maxlength => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5784 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5785 wakaba 1.187 min => FEATURE_HTML5_LC | FEATURE_WF2X,
5786     multiple => FEATURE_HTML5_LC,
5787     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5788 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5789 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5790     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5791     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5792     onformchange => FEATURE_WF2_INFORMATIVE,
5793     onforminput => FEATURE_WF2_INFORMATIVE,
5794     oninput => FEATURE_WF2,
5795     oninvalid => FEATURE_WF2,
5796     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5797 wakaba 1.187 pattern => FEATURE_HTML5_LC | FEATURE_WF2X,
5798     placeholder => FEATURE_HTML5_LC,
5799     readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5800 wakaba 1.140 replace => FEATURE_WF2,
5801 wakaba 1.187 required => FEATURE_HTML5_LC | FEATURE_WF2X,
5802 wakaba 1.140 sdapref => FEATURE_HTML20_RFC,
5803 wakaba 1.187 size => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5804 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5805 wakaba 1.187 step => FEATURE_HTML5_LC | FEATURE_WF2X,
5806 wakaba 1.140 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5807     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5808 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5809 wakaba 1.187 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5810 wakaba 1.140 usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5811 wakaba 1.187 value => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5812 wakaba 1.178 width => FEATURE_HTML5_LC,
5813 wakaba 1.140 }->{$attr_ln};
5814    
5815     $checker =
5816     {
5817 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5818     ## applicable for a specific set of states.
5819 wakaba 1.142 accept => '',
5820 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5821     ## NOTE: To which states it applies is not defined in RFC 2070.
5822 wakaba 1.142 action => '',
5823 wakaba 1.150 align => '',
5824 wakaba 1.141 alt => '',
5825 wakaba 1.142 autocomplete => '',
5826 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5827     ## NOTE: <input type=hidden disabled> is not disallowed.
5828 wakaba 1.142 checked => '',
5829     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5830 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5831 wakaba 1.142 enctype => '',
5832     form => $HTMLFormAttrChecker,
5833 wakaba 1.178 height => '',
5834 wakaba 1.150 inputmode => '',
5835     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5836 wakaba 1.142 list => '',
5837     max => '',
5838     maxlength => '',
5839     method => '',
5840     min => '',
5841 wakaba 1.156 multiple => '',
5842 wakaba 1.165 name => $FormControlNameAttrChecker,
5843 wakaba 1.166 novalidate => '',
5844 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5845     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5846     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5847     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5848     ## TODO: tests for four attributes above
5849 wakaba 1.142 pattern => '',
5850 wakaba 1.156 placeholder => '',
5851 wakaba 1.142 readonly => '',
5852 wakaba 1.150 replace => '',
5853 wakaba 1.142 required => '',
5854     size => '',
5855     src => '',
5856     step => '',
5857     target => '',
5858 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5859 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5860 wakaba 1.193 tel => 1, email => 1, password => 1,
5861 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5862 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5863     checkbox => 1,
5864 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5865     button => 1,
5866 wakaba 1.140 }),
5867 wakaba 1.151 usemap => '',
5868 wakaba 1.142 value => '',
5869 wakaba 1.178 width => '',
5870 wakaba 1.140 }->{$attr_ln};
5871 wakaba 1.141
5872     ## State-dependent checkers
5873     unless ($checker) {
5874     if ($state eq 'hidden') {
5875     $checker =
5876     {
5877 wakaba 1.142 value => sub {
5878     my ($self, $attr, $item, $element_state) = @_;
5879 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5880 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5881     $self->{onerror}->(node => $attr,
5882     type => '_charset_ value',
5883     level => $self->{level}->{must});
5884     }
5885     },
5886 wakaba 1.141 }->{$attr_ln} || $checker;
5887 wakaba 1.142 ## TODO: Warn if no name attribute?
5888     ## TODO: Warn if name!=_charset_ and no value attribute?
5889 wakaba 1.168 } elsif ({
5890     datetime => 1, date => 1, month => 1, time => 1,
5891     week => 1, 'datetime-local' => 1,
5892     }->{$state}) {
5893     my $v = {
5894     datetime => ['global_date_and_time_string'],
5895     date => ['date_string'],
5896     month => ['month_string'],
5897     week => ['week_string'],
5898     time => ['time_string'],
5899     'datetime-local' => ['local_date_and_time_string'],
5900     }->{$state};
5901 wakaba 1.144 $checker =
5902     {
5903 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5904     on => 1, off => 1,
5905     }),
5906 wakaba 1.158 list => $ListAttrChecker,
5907 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5908     max => $GetDateTimeAttrChecker->($v->[0]),
5909 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5910 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5911 wakaba 1.148 step => $StepAttrChecker,
5912 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5913 wakaba 1.144 }->{$attr_ln} || $checker;
5914     } elsif ($state eq 'number') {
5915     $checker =
5916     {
5917 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5918     on => 1, off => 1,
5919     }),
5920 wakaba 1.158 list => $ListAttrChecker,
5921 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5922     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5923 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5924 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5925 wakaba 1.148 step => $StepAttrChecker,
5926 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5927 wakaba 1.144 }->{$attr_ln} || $checker;
5928     } elsif ($state eq 'range') {
5929     $checker =
5930     {
5931 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5932     on => 1, off => 1,
5933     }),
5934 wakaba 1.158 list => $ListAttrChecker,
5935 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5936     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5937 wakaba 1.148 step => $StepAttrChecker,
5938 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5939 wakaba 1.144 }->{$attr_ln} || $checker;
5940 wakaba 1.157 } elsif ($state eq 'color') {
5941     $checker =
5942     {
5943     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5944     on => 1, off => 1,
5945     }),
5946 wakaba 1.158 list => $ListAttrChecker,
5947 wakaba 1.157 value => sub {
5948     my ($self, $attr) = @_;
5949     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5950     $self->{onerror}->(node => $attr,
5951     type => 'scolor:syntax error', ## TODOC: type
5952     level => $self->{level}->{must});
5953     }
5954     },
5955     }->{$attr_ln} || $checker;
5956 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5957     $checker =
5958     {
5959 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5960     ## TODO: tests
5961 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5962 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5963     }->{$attr_ln} || $checker;
5964     ## TODO: There MUST be another input type=radio with same
5965     ## name (Radio state).
5966     ## ISSUE: There should be exactly one type=radio with checked?
5967     } elsif ($state eq 'file') {
5968     $checker =
5969     {
5970 wakaba 1.161 accept => $AcceptAttrChecker,
5971 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5972 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5973 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5974 wakaba 1.144 }->{$attr_ln} || $checker;
5975     } elsif ($state eq 'submit') {
5976     $checker =
5977     {
5978 wakaba 1.149 action => $HTMLURIAttrChecker,
5979 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5980     'application/x-www-form-urlencoded' => 1,
5981     'multipart/form-data' => 1,
5982     'text/plain' => 1,
5983     }),
5984 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5985     get => 1, post => 1, put => 1, delete => 1,
5986     }),
5987 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5988 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5989     document => 1, values => 1,
5990     }),
5991     target => $HTMLTargetAttrChecker,
5992 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5993     }->{$attr_ln} || $checker;
5994     } elsif ($state eq 'image') {
5995     $checker =
5996     {
5997 wakaba 1.149 action => $HTMLURIAttrChecker,
5998     align => $GetHTMLEnumeratedAttrChecker->({
5999     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
6000     }),
6001 wakaba 1.144 alt => sub {
6002     my ($self, $attr) = @_;
6003     my $value = $attr->value;
6004     unless (length $value) {
6005     $self->{onerror}->(node => $attr,
6006     type => 'empty anchor image alt',
6007     level => $self->{level}->{must});
6008     }
6009     },
6010 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6011     'application/x-www-form-urlencoded' => 1,
6012     'multipart/form-data' => 1,
6013     'text/plain' => 1,
6014     }),
6015 wakaba 1.178 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
6016 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
6017     method => $GetHTMLEnumeratedAttrChecker->({
6018     get => 1, post => 1, put => 1, delete => 1,
6019     }),
6020 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6021 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
6022     document => 1, values => 1,
6023     }),
6024 wakaba 1.144 src => $HTMLURIAttrChecker,
6025     ## TODO: There is requirements on the referenced resource.
6026 wakaba 1.149 target => $HTMLTargetAttrChecker,
6027     usemap => $HTMLUsemapAttrChecker,
6028 wakaba 1.178 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
6029 wakaba 1.144 }->{$attr_ln} || $checker;
6030     ## TODO: alt & src are required.
6031     } elsif ({
6032     reset => 1, button => 1,
6033     ## NOTE: From Web Forms 2.0:
6034     remove => 1, 'move-up' => 1, 'move-down' => 1,
6035     add => 1,
6036     }->{$state}) {
6037     $checker =
6038     {
6039     ## NOTE: According to Web Forms 2.0, |input| attribute
6040     ## has |template| attribute to support the |add| button
6041     ## type (as part of the repetition template feature). It
6042     ## conflicts with the |template| global attribute
6043     ## introduced as part of the data template feature.
6044     ## NOTE: |template| attribute as defined in Web Forms 2.0
6045     ## has no author requirement.
6046     value => sub { }, ## NOTE: No restriction.
6047     }->{$attr_ln} || $checker;
6048 wakaba 1.193 } else { # Text, Search, E-mail, URL, Telephone, Password
6049 wakaba 1.141 $checker =
6050     {
6051 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
6052     on => 1, off => 1,
6053     }),
6054 wakaba 1.149 ## TODO: inputmode [WF2]
6055 wakaba 1.158 list => $ListAttrChecker,
6056 wakaba 1.147 maxlength => sub {
6057     my ($self, $attr, $item, $element_state) = @_;
6058    
6059     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6060    
6061 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6062 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
6063     ## integers results in a number.
6064     my $max_allowed_value_length = 0+$1;
6065    
6066     my $value = $item->{node}->get_attribute_ns (undef, 'value');
6067     if (defined $value) {
6068     my $codepoint_length = length $value;
6069 wakaba 1.162
6070 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
6071     $self->{onerror}
6072     ->(node => $item->{node}
6073     ->get_attribute_node_ns (undef, 'value'),
6074     type => 'value too long',
6075     level => $self->{level}->{must});
6076     }
6077     }
6078     }
6079     },
6080 wakaba 1.160 pattern => $PatternAttrChecker,
6081 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
6082 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6083 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
6084 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
6085 wakaba 1.143 value => sub {
6086 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
6087     if ($state eq 'url') {
6088     $HTMLURIAttrChecker->(@_);
6089     } elsif ($state eq 'email') {
6090     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
6091     my @addr = split /,/, $attr->value, -1;
6092     @addr = ('') unless @addr;
6093     for (@addr) {
6094 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
6095     s/[\x09\x0A\x0C\x0D\x20]\z//;
6096 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
6097     $self->{onerror}->(node => $attr,
6098     type => 'email:syntax error', ## TODO: type
6099     value => $_,
6100     level => $self->{level}->{must});
6101     }
6102     }
6103     } else {
6104     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
6105     $self->{onerror}->(node => $attr,
6106     type => 'email:syntax error', ## TODO: type
6107     level => $self->{level}->{must});
6108     }
6109     }
6110     } else {
6111     if ($attr->value =~ /[\x0D\x0A]/) {
6112     $self->{onerror}->(node => $attr,
6113     type => 'newline in value', ## TODO: type
6114     level => $self->{level}->{must});
6115     }
6116     }
6117 wakaba 1.143 },
6118 wakaba 1.141 }->{$attr_ln} || $checker;
6119 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
6120 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
6121     if $state eq 'email' and $attr_ln eq 'multiple';
6122 wakaba 1.161
6123     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6124     not $item->{node}->has_attribute_ns (undef, 'title')) {
6125     $self->{onerror}->(node => $item->{node},
6126     type => 'attribute missing',
6127     text => 'title',
6128     level => $self->{level}->{should});
6129     }
6130 wakaba 1.141 }
6131     }
6132    
6133     if (defined $checker) {
6134     if ($checker eq '') {
6135     $checker = sub {
6136     my ($self, $attr) = @_;
6137     $self->{onerror}->(node => $attr,
6138     type => 'input attr not applicable',
6139     text => $state,
6140     level => $self->{level}->{must});
6141     };
6142     }
6143 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
6144     $attr_ln !~ /[A-Z]/) {
6145     $checker = $HTMLDatasetAttrChecker;
6146     $status = $HTMLDatasetAttrStatus;
6147     } else {
6148     $checker = $HTMLAttrChecker->{$attr_ln};
6149     }
6150     }
6151     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
6152     || $AttrChecker->{$attr_ns}->{''};
6153     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
6154     || $AttrStatus->{$attr_ns}->{''};
6155     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
6156 wakaba 1.157
6157 wakaba 1.140 if ($checker) {
6158     $checker->($self, $attr, $item, $element_state) if ref $checker;
6159     } elsif ($attr_ns eq '' and not $status) {
6160     #
6161     } else {
6162     $self->{onerror}->(node => $attr,
6163     type => 'unknown attribute',
6164     level => $self->{level}->{uncertain});
6165     ## ISSUE: No comformance createria for unknown attributes in the spec
6166     }
6167    
6168     $self->_attr_status_info ($attr, $status);
6169     }
6170 wakaba 1.168
6171     ## ISSUE: -0/+0
6172    
6173     if ($state eq 'range') {
6174     $element_state->{number_value}->{min} ||= 0;
6175     $element_state->{number_value}->{max} = 100
6176     unless defined $element_state->{number_value}->{max};
6177     }
6178    
6179     if (defined $element_state->{date_value}->{min} or
6180     defined $element_state->{date_value}->{max}) {
6181     my $min_value = $element_state->{date_value}->{min};
6182     my $max_value = $element_state->{date_value}->{max};
6183     my $value_value = $element_state->{date_value}->{value};
6184    
6185     if (defined $min_value and $min_value eq '' and
6186     (defined $max_value or defined $value_value)) {
6187     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6188     $self->{onerror}->(node => $min,
6189     type => 'date value not supported', ## TODOC: type
6190     value => $min->value,
6191     level => $self->{level}->{unsupported});
6192     undef $min_value;
6193     }
6194     if (defined $max_value and $max_value eq '' and
6195     (defined $max_value or defined $value_value)) {
6196     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6197     $self->{onerror}->(node => $max,
6198     type => 'date value not supported', ## TODOC: type
6199     value => $max->value,
6200     level => $self->{level}->{unsupported});
6201     undef $max_value;
6202     }
6203     if (defined $value_value and $value_value eq '' and
6204     (defined $max_value or defined $min_value)) {
6205     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6206     $self->{onerror}->(node => $value,
6207     type => 'date value not supported', ## TODOC: type
6208     value => $value->value,
6209     level => $self->{level}->{unsupported});
6210     undef $value_value;
6211     }
6212    
6213     if (defined $min_value and defined $max_value) {
6214     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6215     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6216     $self->{onerror}->(node => $max,
6217     type => 'max lt min', ## TODOC: type
6218     level => $self->{level}->{must});
6219     }
6220     }
6221    
6222     if (defined $min_value and defined $value_value) {
6223     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6224     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6225     $self->{onerror}->(node => $value,
6226     type => 'value lt min', ## TODOC: type
6227     level => $self->{level}->{warn});
6228     ## NOTE: Not an error.
6229     }
6230     }
6231    
6232     if (defined $max_value and defined $value_value) {
6233     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6234     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6235     $self->{onerror}->(node => $value,
6236     type => 'value gt max', ## TODOC: type
6237     level => $self->{level}->{warn});
6238     ## NOTE: Not an error.
6239     }
6240     }
6241     } elsif (defined $element_state->{number_value}->{min} or
6242     defined $element_state->{number_value}->{max}) {
6243     my $min_value = $element_state->{number_value}->{min};
6244     my $max_value = $element_state->{number_value}->{max};
6245     my $value_value = $element_state->{number_value}->{value};
6246    
6247     if (defined $min_value and defined $max_value) {
6248     if ($min_value > $max_value) {
6249     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6250     $self->{onerror}->(node => $max,
6251     type => 'max lt min', ## TODOC: type
6252     level => $self->{level}->{must});
6253     }
6254     }
6255    
6256     if (defined $min_value and defined $value_value) {
6257     if ($min_value > $value_value) {
6258     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6259     $self->{onerror}->(node => $value,
6260     type => 'value lt min', ## TODOC: type
6261     level => $self->{level}->{warn});
6262     ## NOTE: Not an error.
6263     }
6264     }
6265    
6266     if (defined $max_value and defined $value_value) {
6267     if ($max_value < $value_value) {
6268     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6269     $self->{onerror}->(node => $value,
6270     type => 'value gt max', ## TODOC: type
6271     level => $self->{level}->{warn});
6272     ## NOTE: Not an error.
6273     }
6274     }
6275     }
6276 wakaba 1.150
6277 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6278    
6279 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6280     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6281     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6282     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6283     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6284 wakaba 1.192 }, # check_attrs
6285 wakaba 1.66 check_start => sub {
6286     my ($self, $item, $element_state) = @_;
6287 wakaba 1.192 $FAECheckStart->($self, $item, $element_state);
6288     }, # check_start
6289     check_attrs2 => sub {
6290     my ($self, $item, $element_state) = @_;
6291     $FAECheckAttrs2->($self, $item, $element_state);
6292     }, # check_attrs2
6293     }; # input
6294 wakaba 1.52
6295 wakaba 1.178 ## XXXresource: Dimension attributes have requirements on width and
6296     ## height of referenced resource.
6297 wakaba 1.80
6298 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6299 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6300 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6301 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6302 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6303     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6304     ## |button| elements.
6305 wakaba 1.56 action => $HTMLURIAttrChecker,
6306 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6307 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6308 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6309     'application/x-www-form-urlencoded' => 1,
6310     'multipart/form-data' => 1,
6311     'text/plain' => 1,
6312     }),
6313 wakaba 1.136 form => $HTMLFormAttrChecker,
6314 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6315     get => 1, post => 1, put => 1, delete => 1,
6316     }),
6317 wakaba 1.165 name => $FormControlNameAttrChecker,
6318 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6319 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6320     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6321 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6322     target => $HTMLTargetAttrChecker,
6323 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6324     ## attribute to support the |add| button type (as part of repetition
6325     ## template feature). It conflicts with the |template| global attribute
6326     ## introduced as part of the data template feature.
6327     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6328     ## author requirement.
6329 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6330     button => 1, submit => 1, reset => 1,
6331     }),
6332 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6333 wakaba 1.52 }, {
6334     %HTMLAttrStatus,
6335     %HTMLM12NCommonAttrStatus,
6336 wakaba 1.176 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
6337 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6338 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6339 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6340     dataformatas => FEATURE_HTML4_REC_RESERVED,
6341     datasrc => FEATURE_HTML4_REC_RESERVED,
6342 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6343 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6344 wakaba 1.187 form => FEATURE_HTML5_LC | FEATURE_WF2X,
6345     lang => FEATURE_HTML5_REC,
6346 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6347 wakaba 1.187 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6348 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6349 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6350     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6351 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6352     onforminput => FEATURE_WF2_INFORMATIVE,
6353 wakaba 1.56 replace => FEATURE_WF2,
6354 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6355 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6356 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6357 wakaba 1.187 type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6358     value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6359 wakaba 1.52 }),
6360 wakaba 1.66 check_start => sub {
6361     my ($self, $item, $element_state) = @_;
6362 wakaba 1.192 $FAECheckStart->($self, $item, $element_state);
6363 wakaba 1.162
6364     ## ISSUE: "The value attribute must not be present unless the form
6365     ## [content] attribute is present.": Wrong?
6366 wakaba 1.139
6367 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6368     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6369 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6370     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6371 wakaba 1.192 }, # check_start
6372     check_attrs2 => sub {
6373     my ($self, $item, $element_state) = @_;
6374     $FAECheckAttrs2->($self, $item, $element_state);
6375     }, # check_attrs2
6376 wakaba 1.52 };
6377    
6378     $Element->{$HTML_NS}->{label} = {
6379 wakaba 1.139 %HTMLPhrasingContentChecker,
6380 wakaba 1.187 status => FEATURE_HTML5_REC,
6381 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6382 wakaba 1.138 for => sub {
6383     my ($self, $attr) = @_;
6384    
6385     ## NOTE: MUST be an ID of a labelable element.
6386    
6387     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6388     },
6389 wakaba 1.136 form => $HTMLFormAttrChecker,
6390 wakaba 1.52 }, {
6391     %HTMLAttrStatus,
6392 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6393 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2 | FEATURE_M12N10_REC,
6394 wakaba 1.187 for => FEATURE_HTML5_REC,
6395     form => FEATURE_HTML5_LC,
6396     lang => FEATURE_HTML5_REC,
6397 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6398     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6399     }),
6400 wakaba 1.139 check_start => sub {
6401     my ($self, $item, $element_state) = @_;
6402     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6403    
6404 wakaba 1.192 ## If $self->{flag}->{has_label} is true, then there is at least
6405     ## an ancestor |label| element.
6406    
6407     ## If $self->{flag}->{has_labelable} is equal to 1, then there is
6408     ## an ancestor |label| element with its |for| attribute specified.
6409     ## If the value is equal to 2, then there is an ancestor |label|
6410     ## element with its |for| attribute unspecified but there is an
6411     ## associated form control element.
6412    
6413 wakaba 1.139 $element_state->{has_label_original} = $self->{flag}->{has_label};
6414 wakaba 1.192 $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6415     $element_state->{label_for_original} = $self->{flag}->{label_for};
6416    
6417 wakaba 1.139 $self->{flag}->{has_label} = 1;
6418 wakaba 1.155 $self->{flag}->{has_labelable}
6419     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6420 wakaba 1.192 $self->{flag}->{label_for}
6421     = $item->{node}->get_attribute_ns (undef, 'for');
6422 wakaba 1.139
6423     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6424     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6425     },
6426     check_end => sub {
6427     my ($self, $item, $element_state) = @_;
6428     $self->_remove_minus_elements ($element_state);
6429    
6430     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6431     $self->{flag}->{has_labelable}
6432     = $element_state->{has_labelable_original};
6433     }
6434     delete $self->{flag}->{has_label}
6435     unless $element_state->{has_label_original};
6436 wakaba 1.192 $self->{flag}->{label_for} = $element_state->{label_for_original};
6437    
6438 wakaba 1.139 ## TODO: Warn if no labelable descendant? <input type=hidden>?
6439    
6440     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6441    
6442     $HTMLPhrasingContentChecker{check_end}->(@_);
6443     },
6444 wakaba 1.192 }; # label
6445 wakaba 1.52
6446     $Element->{$HTML_NS}->{select} = {
6447 wakaba 1.121 %HTMLChecker,
6448 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6449 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6450     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6451 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6452 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6453 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6454 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6455 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6456 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6457 wakaba 1.136 form => $HTMLFormAttrChecker,
6458 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6459 wakaba 1.165 name => $FormControlNameAttrChecker,
6460 wakaba 1.163 ## TODO: tests for on*
6461 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6462     onforminput => $HTMLEventHandlerAttrChecker,
6463     oninput => $HTMLEventHandlerAttrChecker,
6464 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6465 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6466 wakaba 1.52 }, {
6467     %HTMLAttrStatus,
6468     %HTMLM12NCommonAttrStatus,
6469 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_WF2,
6470 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6471 wakaba 1.56 data => FEATURE_WF2,
6472 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6473     dataformatas => FEATURE_HTML4_REC_RESERVED,
6474     datasrc => FEATURE_HTML4_REC_RESERVED,
6475 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6476     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6477     lang => FEATURE_HTML5_REC,
6478     multiple => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6479     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6480 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6481     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6482 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6483     onforminput => FEATURE_WF2_INFORMATIVE,
6484 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6485 wakaba 1.126 oninput => FEATURE_WF2,
6486 wakaba 1.56 oninvalid => FEATURE_WF2,
6487 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6488     sdapref => FEATURE_HTML20_RFC,
6489 wakaba 1.187 size => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6490 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6491     }),
6492 wakaba 1.66 check_start => sub {
6493     my ($self, $item, $element_state) = @_;
6494 wakaba 1.192 $FAECheckStart->($self, $item, $element_state);
6495 wakaba 1.66
6496     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6497     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6498 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6499     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6500 wakaba 1.192 }, # check_start
6501     check_attrs2 => sub {
6502     my ($self, $item, $element_state) = @_;
6503     $FAECheckAttrs2->($self, $item, $element_state);
6504     }, # check_attrs2
6505 wakaba 1.121 check_child_element => sub {
6506 wakaba 1.163 ## NOTE: (option | optgroup)*
6507    
6508 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6509     $child_is_transparent, $element_state) = @_;
6510 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6511     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6512 wakaba 1.121 $self->{onerror}->(node => $child_el,
6513     type => 'element not allowed:minus',
6514     level => $self->{level}->{must});
6515     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6516     #
6517     } elsif ($child_nsuri eq $HTML_NS and
6518     {
6519     option => 1, optgroup => 1,
6520     }->{$child_ln}) {
6521     #
6522     } else {
6523     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6524     level => $self->{level}->{must});
6525     }
6526     },
6527     check_child_text => sub {
6528     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6529     if ($has_significant) {
6530     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6531     level => $self->{level}->{must});
6532     }
6533     },
6534 wakaba 1.52 };
6535 wakaba 1.1
6536 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6537 wakaba 1.121 %HTMLPhrasingContentChecker,
6538 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X,
6539 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6540     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6541     }, {
6542 wakaba 1.52 %HTMLAttrStatus,
6543 wakaba 1.56 data => FEATURE_WF2,
6544 wakaba 1.52 }),
6545 wakaba 1.66 check_start => sub {
6546     my ($self, $item, $element_state) = @_;
6547    
6548 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6549    
6550 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6551 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6552     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6553 wakaba 1.158
6554     $element_state->{id_type} = 'datalist';
6555 wakaba 1.66 },
6556 wakaba 1.121 ## NOTE: phrasing | option*
6557     check_child_element => sub {
6558     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6559     $child_is_transparent, $element_state) = @_;
6560 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6561     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6562 wakaba 1.121 $self->{onerror}->(node => $child_el,
6563     type => 'element not allowed:minus',
6564     level => $self->{level}->{must});
6565     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6566     #
6567     } elsif ($element_state->{phase} eq 'phrasing') {
6568     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6569     #
6570     } else {
6571     $self->{onerror}->(node => $child_el,
6572     type => 'element not allowed:phrasing',
6573     level => $self->{level}->{must});
6574     }
6575     } elsif ($element_state->{phase} eq 'option') {
6576     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6577     #
6578     } else {
6579     $self->{onerror}->(node => $child_el,
6580     type => 'element not allowed',
6581     level => $self->{level}->{must});
6582     }
6583     } elsif ($element_state->{phase} eq 'any') {
6584     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6585     $element_state->{phase} = 'phrasing';
6586     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6587     $element_state->{phase} = 'option';
6588     } else {
6589     $self->{onerror}->(node => $child_el,
6590     type => 'element not allowed',
6591     level => $self->{level}->{must});
6592     }
6593     } else {
6594     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6595     }
6596     },
6597     check_child_text => sub {
6598     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6599     if ($has_significant) {
6600     if ($element_state->{phase} eq 'phrasing') {
6601     #
6602     } elsif ($element_state->{phase} eq 'any') {
6603     $element_state->{phase} = 'phrasing';
6604     } else {
6605     $self->{onerror}->(node => $child_node,
6606     type => 'character not allowed',
6607     level => $self->{level}->{must});
6608     }
6609     }
6610     },
6611     check_end => sub {
6612     my ($self, $item, $element_state) = @_;
6613     if ($element_state->{phase} eq 'phrasing') {
6614     if ($element_state->{has_significant}) {
6615     $item->{real_parent_state}->{has_significant} = 1;
6616     } elsif ($item->{transparent}) {
6617     #
6618     } else {
6619     $self->{onerror}->(node => $item->{node},
6620     type => 'no significant content',
6621     level => $self->{level}->{should});
6622     }
6623     } else {
6624     ## NOTE: Since the content model explicitly allows a |datalist| element
6625     ## being empty, we don't raise "no significant content" error for this
6626     ## element when there is no element. (We should raise an error for
6627     ## |<datalist><br></datalist>|, however.)
6628     ## NOTE: As a side-effect, when the |datalist| element only contains
6629     ## non-conforming content, then the |phase| flag has not changed from
6630     ## |any|, no "no significant content" error is raised neither.
6631     $HTMLChecker{check_end}->(@_);
6632     }
6633     },
6634 wakaba 1.52 };
6635 wakaba 1.49
6636 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6637 wakaba 1.121 %HTMLChecker,
6638 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6639 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6640     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6641 wakaba 1.164 label => sub {},
6642 wakaba 1.52 }, {
6643     %HTMLAttrStatus,
6644     %HTMLM12NCommonAttrStatus,
6645 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6646     label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6647     lang => FEATURE_HTML5_REC,
6648 wakaba 1.52 }),
6649 wakaba 1.164 check_attrs2 => sub {
6650     my ($self, $item, $element_state) = @_;
6651    
6652     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6653     $self->{onerror}->(node => $item->{node},
6654     type => 'attribute missing',
6655     text => 'label',
6656     level => $self->{level}->{must});
6657     }
6658     },
6659 wakaba 1.121 check_child_element => sub {
6660     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6661     $child_is_transparent, $element_state) = @_;
6662 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6663     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6664 wakaba 1.121 $self->{onerror}->(node => $child_el,
6665     type => 'element not allowed:minus',
6666     level => $self->{level}->{must});
6667     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6668     #
6669     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6670     #
6671     } else {
6672     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6673     level => $self->{level}->{must});
6674     }
6675     },
6676     check_child_text => sub {
6677     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6678     if ($has_significant) {
6679     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6680     level => $self->{level}->{must});
6681     }
6682     },
6683 wakaba 1.52 };
6684    
6685     $Element->{$HTML_NS}->{option} = {
6686     %HTMLTextChecker,
6687 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6688 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6689     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6690 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6691     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6692     value => sub {}, ## NOTE: No restriction.
6693 wakaba 1.52 }, {
6694     %HTMLAttrStatus,
6695     %HTMLM12NCommonAttrStatus,
6696 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6697     label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6698     lang => FEATURE_HTML5_REC,
6699 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6700     sdapref => FEATURE_HTML20_RFC,
6701 wakaba 1.187 selected => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6702     value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6703 wakaba 1.52 }),
6704     };
6705 wakaba 1.49
6706 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6707     %HTMLTextChecker,
6708 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6709 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6710 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6711 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6712 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6713 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6714 wakaba 1.136 form => $HTMLFormAttrChecker,
6715 wakaba 1.56 ## TODO: inputmode [WF2]
6716 wakaba 1.164 maxlength => sub {
6717     my ($self, $attr, $item, $element_state) = @_;
6718    
6719     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6720    
6721 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6722 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6723     ## results in a number.
6724     my $max_allowed_value_length = 0+$1;
6725    
6726     ## ISSUE: "The the purposes of this requirement," (typo)
6727    
6728     ## ISSUE: This constraint is applied w/o CRLF normalization to
6729     ## |value| attribute, but w/ CRLF normalization to
6730     ## concept-value.
6731     my $value = $item->{node}->text_content;
6732     if (defined $value) {
6733     my $codepoint_length = length $value;
6734    
6735     if ($codepoint_length > $max_allowed_value_length) {
6736     $self->{onerror}->(node => $item->{node},
6737     type => 'value too long',
6738     level => $self->{level}->{must});
6739     }
6740     }
6741     }
6742     },
6743 wakaba 1.165 name => $FormControlNameAttrChecker,
6744 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6745     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6746     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6747 wakaba 1.161 pattern => $PatternAttrChecker,
6748 wakaba 1.179 placeholder => $PlaceholderAttrChecker,
6749 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6750 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6751 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6752     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6753     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6754 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6755 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6756 wakaba 1.52 }, {
6757     %HTMLAttrStatus,
6758     %HTMLM12NCommonAttrStatus,
6759 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6760 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6761 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
6762 wakaba 1.187 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6763     cols => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6764 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6765 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6766     datasrc => FEATURE_HTML4_REC_RESERVED,
6767 wakaba 1.187 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6768     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6769 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6770 wakaba 1.187 lang => FEATURE_HTML5_REC,
6771 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6772 wakaba 1.187 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6773 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6774     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6775     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6776 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6777     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6778     oninput => FEATURE_WF2, ## TODO: tests
6779     oninvalid => FEATURE_WF2, ## TODO: tests
6780 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6781 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6782 wakaba 1.179 placeholder => FEATURE_HTML5_LC,
6783 wakaba 1.187 readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6784     required => FEATURE_HTML5_LC | FEATURE_WF2X,
6785     rows => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6786 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6787     sdapref => FEATURE_HTML20_RFC,
6788 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6789 wakaba 1.187 wrap => FEATURE_HTML5_LC | FEATURE_WF2X,
6790 wakaba 1.52 }),
6791 wakaba 1.66 check_start => sub {
6792     my ($self, $item, $element_state) = @_;
6793 wakaba 1.192 $FAECheckStart->($self, $item, $element_state);
6794 wakaba 1.164
6795     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6796     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6797     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6798     },
6799     check_attrs2 => sub {
6800     my ($self, $item, $element_state) = @_;
6801 wakaba 1.66
6802 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6803     not $item->{node}->has_attribute_ns (undef, 'title')) {
6804     ## NOTE: WF2 (dropped by HTML5)
6805     $self->{onerror}->(node => $item->{node},
6806     type => 'attribute missing',
6807     text => 'title',
6808     level => $self->{level}->{should});
6809     }
6810    
6811 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6812     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6813     if (defined $wrap) {
6814     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6815     if ($wrap eq 'hard') {
6816     $self->{onerror}->(node => $item->{node},
6817     type => 'attribute missing',
6818     text => 'cols',
6819     level => $self->{level}->{must});
6820     }
6821     }
6822     }
6823 wakaba 1.192
6824     $FAECheckAttrs2->($self, $item, $element_state);
6825     }, # check_attrs2
6826 wakaba 1.194 }; # textarea
6827    
6828     $Element->{$HTML_NS}->{keygen} = {
6829     %HTMLEmptyChecker,
6830     status => FEATURE_HTML5_FD,
6831     check_attrs => $GetHTMLAttrsChecker->({
6832     autofocus => $AutofocusAttrChecker,
6833     challenge => sub { }, ## No constraints.
6834     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6835     form => $HTMLFormAttrChecker,
6836     keytype => $GetHTMLEnumeratedAttrChecker->({rsa => 1}),
6837     name => $FormControlNameAttrChecker,
6838     }, {
6839     %HTMLAttrStatus,
6840     autofocus => FEATURE_HTML5_LC,
6841     challenge => FEATURE_HTML5_FD,
6842     disabled => FEATURE_HTML5_LC,
6843     form => FEATURE_HTML5_LC,
6844     keytype => FEATURE_HTML5_FD,
6845     name => FEATURE_HTML5_LC,
6846     }), # check_attrs
6847     check_start => sub {
6848     my ($self, $item, $element_state) = @_;
6849     $FAECheckStart->($self, $item, $element_state);
6850    
6851     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6852     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6853     }, # check_start
6854     check_attrs2 => sub {
6855     my ($self, $item, $element_state) = @_;
6856     $FAECheckAttrs2->($self, $item, $element_state);
6857     }, # check_attrs2
6858     }; # keygen
6859 wakaba 1.49
6860 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6861 wakaba 1.121 %HTMLPhrasingContentChecker,
6862 wakaba 1.187 status => FEATURE_HTML5_LC | FEATURE_WF2X,
6863 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6864 wakaba 1.165 for => sub {
6865     my ($self, $attr) = @_;
6866    
6867     ## NOTE: "Unordered set of unique space-separated tokens".
6868    
6869     my %word;
6870     for my $word (grep {length $_}
6871     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6872     unless ($word{$word}) {
6873     $word{$word} = 1;
6874     push @{$self->{idref}}, ['any', $word, $attr];
6875     } else {
6876     $self->{onerror}->(node => $attr, type => 'duplicate token',
6877     value => $word,
6878     level => $self->{level}->{must});
6879     }
6880     }
6881     },
6882 wakaba 1.136 form => $HTMLFormAttrChecker,
6883 wakaba 1.165 name => $FormControlNameAttrChecker,
6884     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6885     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6886 wakaba 1.56 }, {
6887 wakaba 1.52 %HTMLAttrStatus,
6888 wakaba 1.187 for => FEATURE_HTML5_LC | FEATURE_WF2X,
6889     form => FEATURE_HTML5_LC | FEATURE_WF2X,
6890     name => FEATURE_HTML5_LC | FEATURE_WF2X,
6891 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6892     onformchange => FEATURE_WF2,
6893     onforminput => FEATURE_WF2,
6894 wakaba 1.52 }),
6895     };
6896    
6897     $Element->{$HTML_NS}->{isindex} = {
6898     %HTMLEmptyChecker,
6899 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6900     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6901 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6902     prompt => sub {}, ## NOTE: Text [M12N]
6903     }, {
6904     %HTMLAttrStatus,
6905 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6906     dir => FEATURE_HTML5_REC,
6907     id => FEATURE_HTML5_REC,
6908     lang => FEATURE_HTML5_REC,
6909 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6910 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6911 wakaba 1.187 style => FEATURE_HTML5_REC,
6912     title => FEATURE_HTML5_REC,
6913 wakaba 1.52 }),
6914     ## TODO: Tests
6915     ## TODO: Tests for <nest/> in <isindex>
6916 wakaba 1.66 check_start => sub {
6917     my ($self, $item, $element_state) = @_;
6918    
6919     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6920 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6921     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6922 wakaba 1.66 },
6923 wakaba 1.52 };
6924 wakaba 1.49
6925 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6926 wakaba 1.40 %HTMLChecker,
6927 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6928 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6929 wakaba 1.91 charset => sub {
6930     my ($self, $attr) = @_;
6931    
6932     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6933     $self->{onerror}->(type => 'attribute not allowed',
6934     node => $attr,
6935 wakaba 1.104 level => $self->{level}->{must});
6936 wakaba 1.91 }
6937    
6938 wakaba 1.188 ## XXXresource: MUST match the charset of the referenced
6939     ## resource (HTML5 revision 2967).
6940    
6941 wakaba 1.91 $HTMLCharsetChecker->($attr->value, @_);
6942     },
6943 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6944 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6945 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6946     async => $GetHTMLBooleanAttrChecker->('async'),
6947 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6948 wakaba 1.49 }, {
6949     %HTMLAttrStatus,
6950 wakaba 1.153 async => FEATURE_HTML5_WD,
6951     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6952     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6953 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6954     for => FEATURE_HTML4_REC_RESERVED,
6955 wakaba 1.154 href => FEATURE_RDFA_REC,
6956 wakaba 1.187 id => FEATURE_HTML5_REC,
6957 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6958 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6959     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6960 wakaba 1.9 }),
6961 wakaba 1.191 check_attrs2 => sub {
6962     my ($self, $item, $element_state) = @_;
6963    
6964     my $el = $item->{node};
6965     if ($el->has_attribute_ns (undef, 'defer') and
6966     not $el->has_attribute_ns (undef, 'src')) {
6967     $self->{onerror}->(node => $el,
6968     type => 'attribute missing',
6969     text => 'src',
6970     level => $self->{level}->{must});
6971     }
6972     },
6973 wakaba 1.40 check_start => sub {
6974     my ($self, $item, $element_state) = @_;
6975 wakaba 1.1
6976 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6977     $element_state->{must_be_empty} = 1;
6978 wakaba 1.1 } else {
6979     ## NOTE: No content model conformance in HTML5 spec.
6980 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6981     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6982 wakaba 1.1 if ((defined $type and $type eq '') or
6983     (defined $language and $language eq '')) {
6984     $type = 'text/javascript';
6985     } elsif (defined $type) {
6986     #
6987     } elsif (defined $language) {
6988     $type = 'text/' . $language;
6989     } else {
6990     $type = 'text/javascript';
6991     }
6992 wakaba 1.93
6993     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6994     $type = "$1/$2";
6995     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6996     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6997     }
6998     $element_state->{script_type} = $type;
6999 wakaba 1.40 }
7000 wakaba 1.66
7001     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7002 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7003     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7004 wakaba 1.107
7005     $element_state->{text} = '';
7006 wakaba 1.40 },
7007     check_child_element => sub {
7008     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7009     $child_is_transparent, $element_state) = @_;
7010 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7011     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7012 wakaba 1.40 $self->{onerror}->(node => $child_el,
7013     type => 'element not allowed:minus',
7014 wakaba 1.104 level => $self->{level}->{must});
7015 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7016     #
7017     } else {
7018     if ($element_state->{must_be_empty}) {
7019     $self->{onerror}->(node => $child_el,
7020 wakaba 1.104 type => 'element not allowed:empty',
7021     level => $self->{level}->{must});
7022 wakaba 1.40 }
7023     }
7024     },
7025     check_child_text => sub {
7026     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7027     if ($has_significant and
7028     $element_state->{must_be_empty}) {
7029     $self->{onerror}->(node => $child_node,
7030 wakaba 1.104 type => 'character not allowed:empty',
7031     level => $self->{level}->{must});
7032 wakaba 1.40 }
7033 wakaba 1.115 $element_state->{text} .= $child_node->data;
7034 wakaba 1.40 },
7035     check_end => sub {
7036     my ($self, $item, $element_state) = @_;
7037     unless ($element_state->{must_be_empty}) {
7038 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
7039     ## NOTE: XML content should be checked by THIS instance of checker
7040     ## as part of normal tree validation.
7041 wakaba 1.104 $self->{onerror}->(node => $item->{node},
7042     type => 'XML script lang',
7043     text => $element_state->{script_type},
7044     level => $self->{level}->{uncertain});
7045     ## ISSUE: Should we raise some kind of error for
7046     ## <script type="text/xml">aaaaa</script>?
7047     ## NOTE: ^^^ This is why we throw an "uncertain" error.
7048 wakaba 1.93 } else {
7049     $self->{onsubdoc}->({s => $element_state->{text},
7050     container_node => $item->{node},
7051     media_type => $element_state->{script_type},
7052     is_char_string => 1});
7053     }
7054 wakaba 1.40
7055     $HTMLChecker{check_end}->(@_);
7056 wakaba 1.1 }
7057     },
7058 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
7059     ## NOTE: "When used to include script data, the script data must be embedded
7060     ## inline, the format of the data must be given using the type attribute,
7061     ## and the src attribute must not be specified." - not testable.
7062     ## TODO: It would be possible to err <script type=text/plain src=...>
7063 wakaba 1.1 };
7064 wakaba 1.25 ## ISSUE: Significant check and text child node
7065 wakaba 1.1
7066     ## NOTE: When script is disabled.
7067     $Element->{$HTML_NS}->{noscript} = {
7068 wakaba 1.40 %HTMLTransparentChecker,
7069 wakaba 1.187 status => FEATURE_HTML5_REC,
7070 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
7071     %HTMLAttrStatus,
7072     %HTMLM12NCommonAttrStatus,
7073 wakaba 1.187 lang => FEATURE_HTML5_REC,
7074 wakaba 1.49 }),
7075 wakaba 1.40 check_start => sub {
7076     my ($self, $item, $element_state) = @_;
7077 wakaba 1.3
7078 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
7079 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
7080     level => $self->{level}->{must});
7081 wakaba 1.3 }
7082    
7083 wakaba 1.40 unless ($self->{flag}->{in_head}) {
7084     $self->_add_minus_elements ($element_state,
7085     {$HTML_NS => {noscript => 1}});
7086     }
7087 wakaba 1.79
7088     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7089     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7090 wakaba 1.3 },
7091 wakaba 1.40 check_child_element => sub {
7092     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7093     $child_is_transparent, $element_state) = @_;
7094     if ($self->{flag}->{in_head}) {
7095 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7096     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7097 wakaba 1.40 $self->{onerror}->(node => $child_el,
7098     type => 'element not allowed:minus',
7099 wakaba 1.104 level => $self->{level}->{must});
7100 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7101     #
7102     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
7103     #
7104     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
7105     if ($child_el->has_attribute_ns (undef, 'scoped')) {
7106     $self->{onerror}->(node => $child_el,
7107     type => 'element not allowed:head noscript',
7108 wakaba 1.104 level => $self->{level}->{must});
7109 wakaba 1.40 }
7110     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
7111 wakaba 1.47 my $http_equiv_attr
7112     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
7113     if ($http_equiv_attr) {
7114     ## TODO: case
7115     if (lc $http_equiv_attr->value eq 'content-type') {
7116 wakaba 1.40 $self->{onerror}->(node => $child_el,
7117 wakaba 1.34 type => 'element not allowed:head noscript',
7118 wakaba 1.104 level => $self->{level}->{must});
7119 wakaba 1.47 } else {
7120     #
7121 wakaba 1.3 }
7122 wakaba 1.47 } else {
7123     $self->{onerror}->(node => $child_el,
7124     type => 'element not allowed:head noscript',
7125 wakaba 1.104 level => $self->{level}->{must});
7126 wakaba 1.3 }
7127 wakaba 1.40 } else {
7128     $self->{onerror}->(node => $child_el,
7129     type => 'element not allowed:head noscript',
7130 wakaba 1.104 level => $self->{level}->{must});
7131 wakaba 1.40 }
7132     } else {
7133     $HTMLTransparentChecker{check_child_element}->(@_);
7134     }
7135     },
7136     check_child_text => sub {
7137     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7138     if ($self->{flag}->{in_head}) {
7139     if ($has_significant) {
7140     $self->{onerror}->(node => $child_node,
7141 wakaba 1.104 type => 'character not allowed',
7142     level => $self->{level}->{must});
7143 wakaba 1.3 }
7144     } else {
7145 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
7146     }
7147     },
7148     check_end => sub {
7149     my ($self, $item, $element_state) = @_;
7150     $self->_remove_minus_elements ($element_state);
7151     if ($self->{flag}->{in_head}) {
7152     $HTMLChecker{check_end}->(@_);
7153     } else {
7154     $HTMLPhrasingContentChecker{check_end}->(@_);
7155 wakaba 1.3 }
7156 wakaba 1.1 },
7157     };
7158 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
7159 wakaba 1.1
7160     $Element->{$HTML_NS}->{'event-source'} = {
7161 wakaba 1.40 %HTMLEmptyChecker,
7162 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
7163     check_attrs => $GetHTMLAttrsChecker->({
7164     src => $HTMLURIAttrChecker,
7165     }, {
7166     %HTMLAttrStatus,
7167     src => FEATURE_HTML5_LC_DROPPED,
7168     }),
7169     check_start => sub {
7170     my ($self, $item, $element_state) = @_;
7171    
7172     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7173     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7174     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7175     },
7176     };
7177    
7178     $Element->{$HTML_NS}->{eventsource} = {
7179     %HTMLEmptyChecker,
7180 wakaba 1.180 status => FEATURE_HTML5_DROPPED,
7181 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7182 wakaba 1.1 src => $HTMLURIAttrChecker,
7183 wakaba 1.50 }, {
7184     %HTMLAttrStatus,
7185 wakaba 1.180 src => FEATURE_HTML5_DROPPED,
7186 wakaba 1.1 }),
7187 wakaba 1.66 check_start => sub {
7188     my ($self, $item, $element_state) = @_;
7189    
7190     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7191 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7192     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7193 wakaba 1.66 },
7194 wakaba 1.1 };
7195    
7196     $Element->{$HTML_NS}->{details} = {
7197 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
7198 wakaba 1.153 status => FEATURE_HTML5_LC,
7199 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7200 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
7201 wakaba 1.50 }, {
7202     %HTMLAttrStatus,
7203 wakaba 1.153 open => FEATURE_HTML5_LC,
7204 wakaba 1.1 }),
7205     };
7206    
7207     $Element->{$HTML_NS}->{datagrid} = {
7208 wakaba 1.72 %HTMLFlowContentChecker,
7209 wakaba 1.48 status => FEATURE_HTML5_WD,
7210 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7211 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7212     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7213 wakaba 1.50 }, {
7214     %HTMLAttrStatus,
7215     disabled => FEATURE_HTML5_WD,
7216     multiple => FEATURE_HTML5_WD,
7217 wakaba 1.1 }),
7218 wakaba 1.40 check_start => sub {
7219     my ($self, $item, $element_state) = @_;
7220 wakaba 1.1
7221 wakaba 1.40 $self->_add_minus_elements ($element_state,
7222     {$HTML_NS => {a => 1, datagrid => 1}});
7223 wakaba 1.172
7224 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7225     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7226 wakaba 1.40 },
7227     check_end => sub {
7228     my ($self, $item, $element_state) = @_;
7229     $self->_remove_minus_elements ($element_state);
7230 wakaba 1.1
7231 wakaba 1.172 $HTMLFlowContentChecker{check_end}->(@_);
7232 wakaba 1.40 },
7233 wakaba 1.1 };
7234    
7235     $Element->{$HTML_NS}->{command} = {
7236 wakaba 1.40 %HTMLEmptyChecker,
7237 wakaba 1.48 status => FEATURE_HTML5_WD,
7238 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7239 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7240     default => $GetHTMLBooleanAttrChecker->('default'),
7241     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7242     icon => $HTMLURIAttrChecker,
7243     label => sub { }, ## NOTE: No conformance creteria
7244     radiogroup => sub { }, ## NOTE: No conformance creteria
7245     type => sub {
7246     my ($self, $attr) = @_;
7247     my $value = $attr->value;
7248     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7249 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7250     level => $self->{level}->{must});
7251 wakaba 1.1 }
7252     },
7253 wakaba 1.50 }, {
7254     %HTMLAttrStatus,
7255     checked => FEATURE_HTML5_WD,
7256 wakaba 1.175 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7257 wakaba 1.50 disabled => FEATURE_HTML5_WD,
7258     icon => FEATURE_HTML5_WD,
7259     label => FEATURE_HTML5_WD,
7260     radiogroup => FEATURE_HTML5_WD,
7261     type => FEATURE_HTML5_WD,
7262 wakaba 1.1 }),
7263 wakaba 1.66 check_start => sub {
7264     my ($self, $item, $element_state) = @_;
7265    
7266     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7267 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7268     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7269 wakaba 1.66 },
7270 wakaba 1.115 };
7271    
7272     $Element->{$HTML_NS}->{bb} = {
7273     %HTMLPhrasingContentChecker,
7274 wakaba 1.190 status => FEATURE_HTML5_DROPPED,
7275 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7276     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7277     }, {
7278     %HTMLAttrStatus,
7279 wakaba 1.190 type => FEATURE_HTML5_DROPPED,
7280 wakaba 1.115 }),
7281 wakaba 1.130 check_start => sub {
7282     my ($self, $item, $element_state) = @_;
7283     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7284    
7285     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7286     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7287     },
7288     check_end => sub {
7289     my ($self, $item, $element_state) = @_;
7290     $self->_remove_minus_elements ($element_state);
7291    
7292     $HTMLTransparentChecker{check_end}->(@_);
7293     },
7294 wakaba 1.1 };
7295    
7296     $Element->{$HTML_NS}->{menu} = {
7297 wakaba 1.40 %HTMLPhrasingContentChecker,
7298 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7299     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7300     ## NOTE: We don't want any |menu| element warned as deprecated.
7301 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7302 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7303 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7304 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7305     ## implementation, it does not match.)
7306 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7307     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7308 wakaba 1.49 }, {
7309     %HTMLAttrStatus,
7310     %HTMLM12NCommonAttrStatus,
7311 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7312 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7313 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7314 wakaba 1.50 label => FEATURE_HTML5_WD,
7315 wakaba 1.187 lang => FEATURE_HTML5_REC,
7316 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7317     sdapref => FEATURE_HTML20_RFC,
7318 wakaba 1.50 type => FEATURE_HTML5_WD,
7319 wakaba 1.1 }),
7320 wakaba 1.40 check_start => sub {
7321     my ($self, $item, $element_state) = @_;
7322     $element_state->{phase} = 'li or phrasing';
7323     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7324     $self->{flag}->{in_menu} = 1;
7325 wakaba 1.79
7326     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7327     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7328 wakaba 1.135 $element_state->{id_type} = 'menu';
7329 wakaba 1.40 },
7330     check_child_element => sub {
7331     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7332     $child_is_transparent, $element_state) = @_;
7333 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7334     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7335 wakaba 1.40 $self->{onerror}->(node => $child_el,
7336     type => 'element not allowed:minus',
7337 wakaba 1.104 level => $self->{level}->{must});
7338 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7339     #
7340     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7341     if ($element_state->{phase} eq 'li') {
7342     #
7343     } elsif ($element_state->{phase} eq 'li or phrasing') {
7344     $element_state->{phase} = 'li';
7345     } else {
7346 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7347     level => $self->{level}->{must});
7348 wakaba 1.40 }
7349     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7350     if ($element_state->{phase} eq 'phrasing') {
7351     #
7352     } elsif ($element_state->{phase} eq 'li or phrasing') {
7353     $element_state->{phase} = 'phrasing';
7354     } else {
7355 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7356     level => $self->{level}->{must});
7357 wakaba 1.40 }
7358     } else {
7359 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7360     level => $self->{level}->{must});
7361 wakaba 1.40 }
7362     },
7363     check_child_text => sub {
7364     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7365     if ($has_significant) {
7366     if ($element_state->{phase} eq 'phrasing') {
7367     #
7368     } elsif ($element_state->{phase} eq 'li or phrasing') {
7369     $element_state->{phase} = 'phrasing';
7370     } else {
7371     $self->{onerror}->(node => $child_node,
7372 wakaba 1.104 type => 'character not allowed',
7373     level => $self->{level}->{must});
7374 wakaba 1.1 }
7375     }
7376 wakaba 1.40 },
7377     check_end => sub {
7378     my ($self, $item, $element_state) = @_;
7379     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7380    
7381     if ($element_state->{phase} eq 'li') {
7382     $HTMLChecker{check_end}->(@_);
7383     } else { # 'phrasing' or 'li or phrasing'
7384     $HTMLPhrasingContentChecker{check_end}->(@_);
7385 wakaba 1.1 }
7386     },
7387 wakaba 1.8 };
7388    
7389     $Element->{$HTML_NS}->{datatemplate} = {
7390 wakaba 1.40 %HTMLChecker,
7391 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7392 wakaba 1.40 check_child_element => sub {
7393     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7394     $child_is_transparent, $element_state) = @_;
7395 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7396     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7397 wakaba 1.40 $self->{onerror}->(node => $child_el,
7398     type => 'element not allowed:minus',
7399 wakaba 1.104 level => $self->{level}->{must});
7400 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7401     #
7402     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7403     #
7404     } else {
7405     $self->{onerror}->(node => $child_el,
7406 wakaba 1.104 type => 'element not allowed:datatemplate',
7407     level => $self->{level}->{must});
7408 wakaba 1.40 }
7409     },
7410     check_child_text => sub {
7411     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7412     if ($has_significant) {
7413 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7414     level => $self->{level}->{must});
7415 wakaba 1.8 }
7416     },
7417     is_xml_root => 1,
7418     };
7419    
7420     $Element->{$HTML_NS}->{rule} = {
7421 wakaba 1.40 %HTMLChecker,
7422 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7423 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7424 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7425 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7426 wakaba 1.50 }, {
7427     %HTMLAttrStatus,
7428     condition => FEATURE_HTML5_AT_RISK,
7429     mode => FEATURE_HTML5_AT_RISK,
7430 wakaba 1.8 }),
7431 wakaba 1.40 check_start => sub {
7432     my ($self, $item, $element_state) = @_;
7433 wakaba 1.79
7434 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7435 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7436     $self->{flag}->{in_rule} = 1;
7437    
7438     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7439     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7440 wakaba 1.40 },
7441     check_child_element => sub { },
7442     check_child_text => sub { },
7443     check_end => sub {
7444     my ($self, $item, $element_state) = @_;
7445 wakaba 1.79
7446 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7447 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7448    
7449 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7450 wakaba 1.8 },
7451     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7452     ## is applied to some conforming data, results in a conforming DOM tree.":
7453     ## We don't check against this.
7454     };
7455    
7456     $Element->{$HTML_NS}->{nest} = {
7457 wakaba 1.40 %HTMLEmptyChecker,
7458 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7459 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7460 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7461     mode => sub {
7462     my ($self, $attr) = @_;
7463     my $value = $attr->value;
7464 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7465 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7466     level => $self->{level}->{must});
7467 wakaba 1.23 }
7468     },
7469 wakaba 1.50 }, {
7470     %HTMLAttrStatus,
7471     filter => FEATURE_HTML5_AT_RISK,
7472     mode => FEATURE_HTML5_AT_RISK,
7473 wakaba 1.8 }),
7474 wakaba 1.1 };
7475    
7476     $Element->{$HTML_NS}->{legend} = {
7477 wakaba 1.40 %HTMLPhrasingContentChecker,
7478 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7479 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7480 wakaba 1.195 # XXX
7481 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
7482     # top => 1, bottom => 1, left => 1, right => 1,
7483     # }),
7484 wakaba 1.167 form => $HTMLFormAttrChecker,
7485 wakaba 1.52 }, {
7486 wakaba 1.49 %HTMLAttrStatus,
7487     %HTMLM12NCommonAttrStatus,
7488 wakaba 1.176 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7489 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7490 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7491 wakaba 1.187 lang => FEATURE_HTML5_REC,
7492 wakaba 1.49 }),
7493 wakaba 1.170 check_child_element => sub {
7494     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7495     $child_is_transparent, $element_state) = @_;
7496 wakaba 1.195 ## XXX This does not work for |<legned><ins><blockquote>|
7497 wakaba 1.170 if ($item->{parent_state}->{in_figure}) {
7498     $HTMLFlowContentChecker{check_child_element}->(@_);
7499     } else {
7500     $HTMLPhrasingContentChecker{check_child_element}->(@_);
7501     }
7502     },
7503     check_child_text => sub {
7504     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7505     if ($item->{parent_state}->{in_figure}) {
7506     $HTMLFlowContentChecker{check_child_text}->(@_);
7507     } else {
7508     $HTMLPhrasingContentChecker{check_child_text}->(@_);
7509     }
7510     },
7511     check_start => sub {
7512     my ($self, $item, $element_state) = @_;
7513     $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7514    
7515     $HTMLFlowContentChecker{check_start}->(@_);
7516     },
7517     check_end => sub {
7518     my ($self, $item, $element_state) = @_;
7519     $self->_remove_minus_elements ($element_state);
7520    
7521     $HTMLFlowContentChecker{check_end}->(@_);
7522     },
7523     }; # legend
7524 wakaba 1.1
7525     $Element->{$HTML_NS}->{div} = {
7526 wakaba 1.72 %HTMLFlowContentChecker,
7527 wakaba 1.187 status => FEATURE_HTML5_REC,
7528 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7529     align => $GetHTMLEnumeratedAttrChecker->({
7530     left => 1, center => 1, right => 1, justify => 1,
7531     }),
7532     }, {
7533 wakaba 1.49 %HTMLAttrStatus,
7534 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7535 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7536     datafld => FEATURE_HTML4_REC_RESERVED,
7537     dataformatas => FEATURE_HTML4_REC_RESERVED,
7538     datasrc => FEATURE_HTML4_REC_RESERVED,
7539 wakaba 1.187 lang => FEATURE_HTML5_REC,
7540 wakaba 1.49 }),
7541 wakaba 1.66 check_start => sub {
7542     my ($self, $item, $element_state) = @_;
7543    
7544     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7545 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7546     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7547 wakaba 1.66 },
7548 wakaba 1.1 };
7549    
7550 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7551 wakaba 1.72 %HTMLFlowContentChecker,
7552 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7553     check_attrs => $GetHTMLAttrsChecker->({}, {
7554     %HTMLAttrStatus,
7555     %HTMLM12NCommonAttrStatus,
7556 wakaba 1.187 lang => FEATURE_HTML5_REC,
7557 wakaba 1.64 }),
7558     };
7559    
7560 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7561 wakaba 1.40 %HTMLTransparentChecker,
7562 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7563 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7564     ## TODO: HTML4 |size|, |color|, |face|
7565 wakaba 1.49 }, {
7566     %HTMLAttrStatus,
7567 wakaba 1.187 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7568 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7569 wakaba 1.187 dir => FEATURE_HTML5_REC,
7570 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7571 wakaba 1.187 id => FEATURE_HTML5_REC,
7572     lang => FEATURE_HTML5_REC,
7573 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7574 wakaba 1.187 style => FEATURE_HTML5_REC,
7575     title => FEATURE_HTML5_REC,
7576 wakaba 1.49 }),
7577 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7578     ## it is allowed only in a document with the WYSIWYG signature. The
7579     ## checker does not check whether there is the signature, since the
7580     ## signature is dropped, too, and has never been implemented. (In addition,
7581     ## for any |font| element an "element not defined" error is raised anyway,
7582     ## such that we don't have to raise an additional error.)
7583 wakaba 1.1 };
7584 wakaba 1.49
7585 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7586     %HTMLEmptyChecker,
7587     status => FEATURE_M12N10_REC_DEPRECATED,
7588     check_attrs => $GetHTMLAttrsChecker->({
7589     ## TODO: color, face, size
7590     }, {
7591     %HTMLAttrStatus,
7592     color => FEATURE_M12N10_REC_DEPRECATED,
7593     face => FEATURE_M12N10_REC_DEPRECATED,
7594 wakaba 1.187 id => FEATURE_HTML5_REC,
7595 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7596     }),
7597 wakaba 1.188 }; # basefont
7598 wakaba 1.64
7599 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7600 wakaba 1.188 ## class title id cols rows style(x10)
7601    
7602     $Element->{$HTML_NS}->{frameset} = {
7603     %HTMLEmptyChecker, # XXX
7604     status => FEATURE_M12N10_REC,
7605     check_attrs => $GetHTMLAttrsChecker->({
7606     ## XXX
7607     onafterprint => $HTMLEventHandlerAttrChecker,
7608     onbeforeprint => $HTMLEventHandlerAttrChecker,
7609     onbeforeunload => $HTMLEventHandlerAttrChecker,
7610     onblur => $HTMLEventHandlerAttrChecker,
7611     onerror => $HTMLEventHandlerAttrChecker,
7612     onfocus => $HTMLEventHandlerAttrChecker,
7613     onhashchange => $HTMLEventHandlerAttrChecker,
7614     onload => $HTMLEventHandlerAttrChecker,
7615     onmessage => $HTMLEventHandlerAttrChecker,
7616     onoffline => $HTMLEventHandlerAttrChecker,
7617     ononline => $HTMLEventHandlerAttrChecker,
7618     onpopstate => $HTMLEventHandlerAttrChecker,
7619     onredo => $HTMLEventHandlerAttrChecker,
7620     onresize => $HTMLEventHandlerAttrChecker,
7621     onstorage => $HTMLEventHandlerAttrChecker,
7622     onundo => $HTMLEventHandlerAttrChecker,
7623     onunload => $HTMLEventHandlerAttrChecker,
7624     }, {
7625     %HTMLAttrStatus,
7626     ## XXX
7627     onload => FEATURE_M12N10_REC,
7628     onunload => FEATURE_M12N10_REC,
7629     }),
7630     }; # frameset
7631    
7632 wakaba 1.49 ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7633     ## noframes Common, lang(xhtml10)
7634    
7635 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7636 wakaba 1.56
7637 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7638     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7639     ## xmp, listing sdapref[HTML2,0]
7640    
7641 wakaba 1.56 =pod
7642    
7643 wakaba 1.61 HTML 2.0 nextid @n
7644    
7645     RFC 2659: CERTS CRYPTOPTS
7646    
7647     ISO-HTML: pre-html, divN
7648 wakaba 1.82
7649     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7650     di (Common), nl (Common), handler (Common, type), standby (Common),
7651     summary (Common)
7652    
7653 wakaba 1.97 Access & XHTML2: access (LC)
7654 wakaba 1.82
7655     XML Events & XForms (for XHTML2 support; very, very low priority)
7656 wakaba 1.61
7657 wakaba 1.56 =cut
7658 wakaba 1.61
7659     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7660     ## We added them only to |a|. |link| and |form| might also allow them
7661     ## in theory.
7662 wakaba 1.1
7663     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7664    
7665     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24