/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.168 - (hide annotations) (download)
Mon Dec 15 06:41:20 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.167: +158 -170 lines
++ whatpm/t/dom-conformance/ChangeLog	15 Dec 2008 06:41:13 -0000
2008-12-15  Wakaba  <wakaba@suika.fam.cx>

	* html-flows-1.dat: Updated test results and added some new tests.

	* html-form-input-1.dat: Added some new tests.

++ whatpm/Whatpm/ContentChecker/ChangeLog	15 Dec 2008 06:40:20 -0000
2008-12-15  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm ($HTMLDatetimeAttrChecker): Removed.
	($GetDateTimeAttrChecker): Added.
	($GetHTMLFloatingPointNumberAttrChecker): Set |number_value| for
	later use.
	(ins/@datetime, del/@datetime): Changed to use newer definitin of
	"global date and time string".
	(input's date- and time-related, and type=number and type=range
	control types): Implemented value="", min="", and max="".

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5 wakaba 1.117 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6    
7 wakaba 1.1 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8    
9 wakaba 1.89 sub FEATURE_HTML5_COMPLETE () {
10 wakaba 1.154 ## NOTE: Part of HTML5, the implemented status.
11 wakaba 1.89 Whatpm::ContentChecker::FEATURE_STATUS_REC |
12     Whatpm::ContentChecker::FEATURE_ALLOWED
13     }
14 wakaba 1.154 sub FEATURE_HTML5_CR () {
15     ## NOTE: Part of HTML5, the awaiting implementation feedback status.
16     Whatpm::ContentChecker::FEATURE_STATUS_CR |
17     Whatpm::ContentChecker::FEATURE_ALLOWED
18     }
19 wakaba 1.54 sub FEATURE_HTML5_LC () {
20 wakaba 1.154 ## NOTE: Part of HTML5, the last call of comments status.
21 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_LC |
22     Whatpm::ContentChecker::FEATURE_ALLOWED
23     }
24     sub FEATURE_HTML5_AT_RISK () {
25 wakaba 1.154 ## NOTE: Part of HTML5, but in the being considered for removal
26     ## status.
27 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
28     Whatpm::ContentChecker::FEATURE_ALLOWED
29     }
30     sub FEATURE_HTML5_WD () {
31 wakaba 1.154 ## NOTE: Part of HTML5, the working draft status.
32 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
33     Whatpm::ContentChecker::FEATURE_ALLOWED
34     }
35     sub FEATURE_HTML5_FD () {
36 wakaba 1.154 ## NOTE: Part of HTML5, the first draft status.
37 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
38     Whatpm::ContentChecker::FEATURE_ALLOWED
39     }
40     sub FEATURE_HTML5_DEFAULT () {
41 wakaba 1.154 ## NOTE: Part of HTML5, but not annotated.
42 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_WD |
43     Whatpm::ContentChecker::FEATURE_ALLOWED
44 wakaba 1.49 }
45 wakaba 1.54 sub FEATURE_HTML5_DROPPED () {
46 wakaba 1.154 ## NOTE: Was part of HTML5, in a status before the last call of
47     ## comments, but then dropped.
48 wakaba 1.49 Whatpm::ContentChecker::FEATURE_STATUS_WD
49     }
50 wakaba 1.118 sub FEATURE_HTML5_LC_DROPPED () {
51 wakaba 1.154 ## NOTE: Was part of HTML5, in the last call of comments status, but
52     ## then dropped.
53 wakaba 1.118 Whatpm::ContentChecker::FEATURE_STATUS_LC
54     }
55 wakaba 1.154
56 wakaba 1.119 sub FEATURE_WF2X () {
57 wakaba 1.154 ## NOTE: Defined in WF2 (whether deprecated or not) and then
58     ## incorporated into the HTML5 spec.
59 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
60     }
61 wakaba 1.54 sub FEATURE_WF2 () {
62 wakaba 1.154 ## NOTE: Features introduced or modified in WF2, which were not
63     ## merged into HTML5.
64 wakaba 1.119 Whatpm::ContentChecker::FEATURE_STATUS_LC
65 wakaba 1.54 }
66 wakaba 1.126 sub FEATURE_WF2_INFORMATIVE () {
67 wakaba 1.154 ## NOTE: Features mentioned in WF2's informative appendix A, which
68     ## were not merged into HTML5.
69 wakaba 1.56 Whatpm::ContentChecker::FEATURE_STATUS_LC
70     }
71 wakaba 1.49
72 wakaba 1.154 sub FEATURE_RDFA_REC () {
73     Whatpm::ContentChecker::FEATURE_STATUS_REC
74 wakaba 1.121 }
75 wakaba 1.154 sub FEATURE_RDFA_LC_DROPPED () {
76     ## NOTE: The feature that was defined in a RDFa last call working
77     ## draft, but then dropped.
78 wakaba 1.61 Whatpm::ContentChecker::FEATURE_STATUS_LC
79     }
80 wakaba 1.58
81     ## NOTE: XHTML Role LCWD has almost no information on how the |role|
82     ## attribute can be used- the only requirements for that matter is:
83     ## "the attribute MUST be referenced using its namespace-qualified form" (and
84     ## this is a host language conformance!).
85 wakaba 1.82 sub FEATURE_ROLE_LC () {
86     Whatpm::ContentChecker::FEATURE_STATUS_LC
87     }
88    
89     sub FEATURE_XHTML2_ED () {
90 wakaba 1.154 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
91     ## "http://www.w3.org/1999/xhtml".
92 wakaba 1.82 Whatpm::ContentChecker::FEATURE_STATUS_WD
93     }
94 wakaba 1.58
95 wakaba 1.55 sub FEATURE_XHTMLBASIC11_CR () {
96 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
97     ## M12N).
98     Whatpm::ContentChecker::FEATURE_STATUS_REC
99 wakaba 1.55 }
100     sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
101 wakaba 1.154 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
102     ## features.
103     Whatpm::ContentChecker::FEATURE_STATUS_REC |
104 wakaba 1.55 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
105     }
106    
107 wakaba 1.154 sub FEATURE_RUBY_REC () {
108     Whatpm::ContentChecker::FEATURE_STATUS_CR
109 wakaba 1.82 }
110    
111 wakaba 1.154 sub FEATURE_M12N11_LC () {
112     ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
113     Whatpm::ContentChecker::FEATURE_STATUS_REC;
114 wakaba 1.99 }
115    
116 wakaba 1.49 ## NOTE: M12N10 status is based on its abstract module definition.
117     ## It contains a number of problems. (However, again, it's a REC!)
118 wakaba 1.54 sub FEATURE_M12N10_REC () {
119 wakaba 1.154 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
120 wakaba 1.54 Whatpm::ContentChecker::FEATURE_STATUS_REC
121     }
122     sub FEATURE_M12N10_REC_DEPRECATED () {
123     Whatpm::ContentChecker::FEATURE_STATUS_REC |
124     Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
125     }
126 wakaba 1.49
127     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
128     ## (second edition). Only missing attributes from M12N10 abstract
129     ## definition are added.
130 wakaba 1.54 sub FEATURE_XHTML10_REC () {
131     Whatpm::ContentChecker::FEATURE_STATUS_CR
132     }
133    
134 wakaba 1.61 ## NOTE: Diff from HTML4.
135     sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
136     Whatpm::ContentChecker::FEATURE_STATUS_CR
137     }
138 wakaba 1.58
139 wakaba 1.49 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
140     ## 4.01). Only missing attributes from XHTML10 are added.
141 wakaba 1.54 sub FEATURE_HTML4_REC_RESERVED () {
142     Whatpm::ContentChecker::FEATURE_STATUS_WD
143     }
144    
145     ## TODO: According to HTML4 definition, authors SHOULD use style sheets
146     ## rather than presentational attributes (deprecated or not deprecated).
147 wakaba 1.48
148 wakaba 1.61 ## NOTE: Diff from HTML4.
149     sub FEATURE_HTML32_REC_OBSOLETE () {
150     Whatpm::ContentChecker::FEATURE_STATUS_CR |
151     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
152     ## NOTE: Lowercase normative "should".
153     }
154    
155     sub FEATURE_RFC2659 () { ## Experimental RFC
156     Whatpm::ContentChecker::FEATURE_STATUS_CR
157     }
158    
159     ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
160     sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
161     Whatpm::ContentChecker::FEATURE_STATUS_CR
162     }
163    
164     ## NOTE: Diff from HTML 2.0.
165     sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
166     Whatpm::ContentChecker::FEATURE_STATUS_CR
167     }
168    
169     ## NOTE: Diff from HTML 3.2.
170     sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
171     Whatpm::ContentChecker::FEATURE_STATUS_CR
172     }
173 wakaba 1.58
174 wakaba 1.29 ## December 2007 HTML5 Classification
175    
176     my $HTMLMetadataContent = {
177     $HTML_NS => {
178     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
179 wakaba 1.118 'event-source' => 1, eventsource => 1,
180     command => 1, datatemplate => 1,
181 wakaba 1.29 ## NOTE: A |meta| with no |name| element is not allowed as
182     ## a metadata content other than |head| element.
183     meta => 1,
184     },
185     ## NOTE: RDF is mentioned in the HTML5 spec.
186     ## TODO: Other RDF elements?
187     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
188     };
189    
190 wakaba 1.72 my $HTMLFlowContent = {
191 wakaba 1.29 $HTML_NS => {
192     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
193     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
194     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
195     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
196 wakaba 1.119 form => 1, fieldset => 1,
197 wakaba 1.72 details => 1, ## ISSUE: "Flow element" in spec.
198     datagrid => 1, ## ISSUE: "Flow element" in spec.
199 wakaba 1.29 datatemplate => 1,
200     div => 1, ## ISSUE: No category in spec.
201     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
202     ## Additionally, it must be before any other element or
203     ## non-inter-element-whitespace text node.
204     style => 1,
205    
206 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
207 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
208     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
209 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
210 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
211     command => 1, bb => 1,
212 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
213 wakaba 1.121 textarea => 1, output => 1,
214 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
215     ## NOTE: |area| is allowed only as a descendant of |map|.
216     area => 1,
217    
218 wakaba 1.124 ## NOTE: Transparent.
219     a => 1, ins => 1, del => 1, font => 1,
220 wakaba 1.29
221 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
222 wakaba 1.29 menu => 1,
223    
224     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
225     canvas => 1,
226     },
227    
228     ## NOTE: Embedded
229     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
230     q<http://www.w3.org/2000/svg> => {svg => 1},
231     };
232    
233 wakaba 1.58 my $HTMLSectioningContent = {
234 wakaba 1.57 $HTML_NS => {
235     section => 1, nav => 1, article => 1, aside => 1,
236     ## NOTE: |body| is only allowed in |html| element.
237     body => 1,
238     },
239     };
240    
241 wakaba 1.58 my $HTMLSectioningRoot = {
242 wakaba 1.29 $HTML_NS => {
243 wakaba 1.58 blockquote => 1, datagrid => 1, figure => 1, td => 1,
244 wakaba 1.29 },
245     };
246    
247     my $HTMLHeadingContent = {
248     $HTML_NS => {
249     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
250     },
251     };
252    
253     my $HTMLPhrasingContent = {
254 wakaba 1.72 ## NOTE: All phrasing content is also flow content.
255 wakaba 1.29 $HTML_NS => {
256 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
257 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
258     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
259 wakaba 1.99 b => 1, bdo => 1, ruby => 1,
260 wakaba 1.118 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
261     command => 1, bb => 1,
262 wakaba 1.119 input => 1, button => 1, label => 1, select => 1, datalist => 1,
263 wakaba 1.121 textarea => 1, output => 1,
264 wakaba 1.29 datagrid => 1, ## ISSUE: "Interactive element" in the spec.
265     ## NOTE: |area| is allowed only as a descendant of |map|.
266     area => 1,
267    
268     ## NOTE: Transparent.
269 wakaba 1.124 a => 1, ins => 1, del => 1, font => 1,
270 wakaba 1.29
271 wakaba 1.72 ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, flow.
272 wakaba 1.29 menu => 1,
273    
274     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
275     canvas => 1,
276     },
277    
278     ## NOTE: Embedded
279     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
280     q<http://www.w3.org/2000/svg> => {svg => 1},
281    
282     ## NOTE: And non-inter-element-whitespace text nodes.
283     };
284    
285 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
286 wakaba 1.29
287     my $HTMLInteractiveContent = {
288     $HTML_NS => {
289     a => 1,
290 wakaba 1.130 label => 1, input => 1, button => 1, select => 1, textarea => 1,
291     details => 1, datagrid => 1, bb => 1,
292    
293     ## NOTE: When "controls" attribute is specified.
294     video => 1, audio => 1,
295    
296     ## NOTE: When "type=toolbar" attribute is specified.
297     menu => 1,
298 wakaba 1.29 },
299     };
300    
301 wakaba 1.139 ## NOTE: Labelable form-associated element.
302     my $LabelableFAE = {
303     $HTML_NS => {
304     input => 1, button => 1, select => 1, textarea => 1,
305     },
306     };
307    
308 wakaba 1.130 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
309    
310 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
311     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
312    
313     ## -- Common attribute syntacx checkers
314    
315 wakaba 1.1 our $AttrChecker;
316 wakaba 1.82 our $AttrStatus;
317 wakaba 1.1
318     my $GetHTMLEnumeratedAttrChecker = sub {
319     my $states = shift; # {value => conforming ? 1 : -1}
320     return sub {
321     my ($self, $attr) = @_;
322     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
323     if ($states->{$value} > 0) {
324     #
325     } elsif ($states->{$value}) {
326 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
327     level => $self->{level}->{must});
328 wakaba 1.1 } else {
329 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
330     level => $self->{level}->{must});
331 wakaba 1.1 }
332     };
333     }; # $GetHTMLEnumeratedAttrChecker
334    
335     my $GetHTMLBooleanAttrChecker = sub {
336     my $local_name = shift;
337     return sub {
338     my ($self, $attr) = @_;
339 wakaba 1.88 my $value = lc $attr->value; ## TODO: case
340 wakaba 1.1 unless ($value eq $local_name or $value eq '') {
341 wakaba 1.88 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
342 wakaba 1.104 level => $self->{level}->{must});
343 wakaba 1.1 }
344     };
345     }; # $GetHTMLBooleanAttrChecker
346    
347 wakaba 1.8 ## Unordered set of space-separated tokens
348 wakaba 1.92 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
349     my $allowed_words = shift;
350     return sub {
351     my ($self, $attr) = @_;
352     my %word;
353 wakaba 1.132 for my $word (grep {length $_}
354     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
355 wakaba 1.92 unless ($word{$word}) {
356     $word{$word} = 1;
357     if (not defined $allowed_words or
358     $allowed_words->{$word}) {
359     #
360     } else {
361 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'word not allowed',
362 wakaba 1.92 value => $word,
363 wakaba 1.104 level => $self->{level}->{must});
364 wakaba 1.92 }
365     } else {
366 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
367     value => $word,
368     level => $self->{level}->{must});
369 wakaba 1.92 }
370 wakaba 1.8 }
371 wakaba 1.92 };
372     }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
373 wakaba 1.8
374 wakaba 1.132 ## |rel| attribute (set of space separated tokens,
375 wakaba 1.1 ## whose allowed values are defined by the section on link types)
376     my $HTMLLinkTypesAttrChecker = sub {
377 wakaba 1.66 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
378 wakaba 1.1 my %word;
379 wakaba 1.132 for my $word (grep {length $_}
380     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
381 wakaba 1.1 unless ($word{$word}) {
382     $word{$word} = 1;
383 wakaba 1.18 } elsif ($word eq 'up') {
384     #
385 wakaba 1.1 } else {
386 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
387     value => $word,
388     level => $self->{level}->{must});
389 wakaba 1.1 }
390     }
391     ## NOTE: Case sensitive match (since HTML5 spec does not say link
392     ## types are case-insensitive and it says "The value should not
393     ## be confusingly similar to any other defined value (e.g.
394     ## differing only in case).").
395     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
396     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
397     ## values to be used conformingly.
398 wakaba 1.66
399     my $is_hyperlink;
400     my $is_resource;
401 wakaba 1.1 require Whatpm::_LinkTypeList;
402     our $LinkType;
403     for my $word (keys %word) {
404     my $def = $LinkType->{$word};
405     if (defined $def) {
406     if ($def->{status} eq 'accepted') {
407     if (defined $def->{effect}->[$a_or_area]) {
408     #
409     } else {
410     $self->{onerror}->(node => $attr,
411 wakaba 1.104 type => 'link type:bad context',
412     value => $word,
413 wakaba 1.110 level => $self->{level}->{must});
414 wakaba 1.1 }
415     } elsif ($def->{status} eq 'proposal') {
416 wakaba 1.104 $self->{onerror}->(node => $attr,
417     type => 'link type:proposed',
418     value => $word,
419     level => $self->{level}->{should});
420 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
421     #
422     } else {
423     $self->{onerror}->(node => $attr,
424 wakaba 1.104 type => 'link type:bad context',
425     value => $word,
426     level => $self->{level}->{must});
427 wakaba 1.20 }
428 wakaba 1.1 } else { # rejected or synonym
429     $self->{onerror}->(node => $attr,
430 wakaba 1.104 type => 'link type:non-conforming',
431     value => $word,
432     level => $self->{level}->{must});
433 wakaba 1.1 }
434 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
435     if ($word eq 'alternate') {
436     #
437     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
438 wakaba 1.66 $is_hyperlink = 1;
439 wakaba 1.4 }
440     }
441 wakaba 1.1 if ($def->{unique}) {
442     unless ($self->{has_link_type}->{$word}) {
443     $self->{has_link_type}->{$word} = 1;
444     } else {
445     $self->{onerror}->(node => $attr,
446 wakaba 1.104 type => 'link type:duplicate',
447     value => $word,
448     level => $self->{level}->{must});
449 wakaba 1.1 }
450     }
451 wakaba 1.66
452     if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
453     $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
454     $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
455     }
456 wakaba 1.1 } else {
457 wakaba 1.104 $self->{onerror}->(node => $attr,
458     type => 'unknown link type',
459     value => $word,
460     level => $self->{level}->{uncertain});
461 wakaba 1.1 }
462     }
463 wakaba 1.66 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
464 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
465     ## says that using both X-Pingback: header field and HTML
466     ## <link rel=pingback> is deprecated and if both appears they
467     ## SHOULD contain exactly the same value.
468     ## ISSUE: Pingback 1.0 specification defines the exact representation
469     ## of its link element, which cannot be tested by the current arch.
470     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
471     ## include any string that matches to the pattern for the rel=pingback link,
472     ## which again inpossible to test.
473     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
474 wakaba 1.12
475     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
476 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
477     ## then they SHOULD be described in different paragraphs.".
478 wakaba 1.66
479     $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
480     if ($is_hyperlink or $a_or_area) {
481     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
482     }
483     if ($is_resource and not $a_or_area) {
484     $element_state->{uri_info}->{href}->{type}->{resource} = 1;
485     }
486 wakaba 1.96
487     $element_state->{link_rel} = \%word;
488 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
489 wakaba 1.20
490     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
491 wakaba 1.1
492     ## URI (or IRI)
493     my $HTMLURIAttrChecker = sub {
494 wakaba 1.66 my ($self, $attr, $item, $element_state) = @_;
495 wakaba 1.1 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
496     my $value = $attr->value;
497     Whatpm::URIChecker->check_iri_reference ($value, sub {
498 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
499 wakaba 1.106 }), $self->{level};
500 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
501 wakaba 1.66
502     my $attr_name = $attr->name;
503     $element_state->{uri_info}->{$attr_name}->{node} = $attr;
504     ## TODO: absolute
505     push @{$self->{return}->{uri}->{$value} ||= []},
506     $element_state->{uri_info}->{$attr_name};
507 wakaba 1.1 }; # $HTMLURIAttrChecker
508    
509     ## A space separated list of one or more URIs (or IRIs)
510     my $HTMLSpaceURIsAttrChecker = sub {
511     my ($self, $attr) = @_;
512 wakaba 1.66
513     my $type = {ping => 'action',
514     profile => 'namespace',
515     archive => 'resource'}->{$attr->name};
516    
517 wakaba 1.1 my $i = 0;
518 wakaba 1.132 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
519 wakaba 1.1 Whatpm::URIChecker->check_iri_reference ($value, sub {
520 wakaba 1.104 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
521 wakaba 1.106 }, $self->{level});
522 wakaba 1.66
523     ## TODO: absolute
524     push @{$self->{return}->{uri}->{$value} ||= []},
525 wakaba 1.67 {node => $attr, type => {$type => 1}};
526 wakaba 1.66
527 wakaba 1.1 $i++;
528     }
529 wakaba 1.67 ## ISSUE: Relative references? (especially, in profile="")
530 wakaba 1.1 ## ISSUE: Leading or trailing white spaces are conformant?
531     ## ISSUE: A sequence of white space characters are conformant?
532     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
533 wakaba 1.132 ## ISSUE: What is "space"?
534 wakaba 1.1 ## NOTE: Duplication seems not an error.
535 wakaba 1.4 $self->{has_uri_attr} = 1;
536 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
537    
538 wakaba 1.156 my $ValidEmailAddress;
539     {
540     my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
541     my $dot_atom = qr/$atext+(?>\.$atext+)*/;
542     $ValidEmailAddress = qr/$dot_atom\@$dot_atom/;
543     }
544    
545 wakaba 1.168 ## Valid global date and time.
546     my $GetDateTimeAttrChecker = sub ($) {
547     my $type = shift;
548     return sub {
549     my ($self, $attr, $item, $element_state) = @_;
550    
551     my $range_error;
552    
553     require Message::Date;
554     my $dp = Message::Date->new;
555     $dp->{level} = $self->{level};
556     $dp->{onerror} = sub {
557     my %opt = @_;
558     unless ($opt{type} eq 'date value not supported') {
559     $self->{onerror}->(%opt, node => $attr);
560     $range_error = '';
561     }
562     };
563    
564     my $method = 'parse_' . $type;
565     my $d = $dp->$method ($attr->value);
566     $element_state->{date_value}->{$attr->name} = $d || $range_error;
567     };
568     }; # $GetDateTimeAttrChecker
569 wakaba 1.1
570     my $HTMLIntegerAttrChecker = sub {
571     my ($self, $attr) = @_;
572     my $value = $attr->value;
573     unless ($value =~ /\A-?[0-9]+\z/) {
574 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
575     level => $self->{level}->{must});
576 wakaba 1.1 }
577     }; # $HTMLIntegerAttrChecker
578    
579     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
580     my $range_check = shift;
581     return sub {
582     my ($self, $attr) = @_;
583     my $value = $attr->value;
584     if ($value =~ /\A[0-9]+\z/) {
585     unless ($range_check->($value + 0)) {
586 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
587     level => $self->{level}->{must});
588 wakaba 1.1 }
589     } else {
590     $self->{onerror}->(node => $attr,
591 wakaba 1.104 type => 'nninteger:syntax error',
592     level => $self->{level}->{must});
593 wakaba 1.1 }
594     };
595     }; # $GetHTMLNonNegativeIntegerAttrChecker
596    
597     my $GetHTMLFloatingPointNumberAttrChecker = sub {
598     my $range_check = shift;
599     return sub {
600 wakaba 1.168 my ($self, $attr, $item, $element_state) = @_;
601 wakaba 1.1 my $value = $attr->value;
602 wakaba 1.90 if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
603     $value =~ /\A-?\.[0-9]+\z/) {
604 wakaba 1.168 if ($range_check->($value + 0)) {
605     ## TODO: parse algorithm
606     $element_state->{number_value}->{$attr->name} = $value + 0;
607     } else {
608 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'float:out of range',
609     level => $self->{level}->{must});
610 wakaba 1.1 }
611     } else {
612     $self->{onerror}->(node => $attr,
613 wakaba 1.104 type => 'float:syntax error',
614     level => $self->{level}->{must});
615 wakaba 1.1 }
616     };
617 wakaba 1.144
618     ## TODO: scientific notation
619 wakaba 1.1 }; # $GetHTMLFloatingPointNumberAttrChecker
620    
621 wakaba 1.148 my $StepAttrChecker = sub {
622     ## NOTE: A valid floating point number (> 0), or ASCII
623     ## case-insensitive "any".
624    
625     my ($self, $attr) = @_;
626     my $value = $attr->value;
627     if ($value =~ /\A-?[0-9]+(?>\.[0-9]*)?\z/ or
628     $value =~ /\A-?\.[0-9]+\z/) {
629     unless ($value > 0) {
630     $self->{onerror}->(node => $attr, type => 'float:out of range',
631     level => $self->{level}->{must});
632     }
633     } elsif ($value =~ /\A[Aa][Nn][Yy]\z/) {
634     #
635     } else {
636     $self->{onerror}->(node => $attr,
637     type => 'float:syntax error',
638     level => $self->{level}->{must});
639     }
640    
641     ## TODO: scientific
642     }; # $StepAttrChecker
643    
644 wakaba 1.86 ## HTML4 %Length;
645     my $HTMLLengthAttrChecker = sub {
646     my ($self, $attr) = @_;
647     my $value = $attr->value;
648     unless ($value =~ /\A[0-9]+%?\z/) {
649     $self->{onerror}->(node => $attr, type => 'length:syntax error',
650 wakaba 1.104 level => $self->{level}->{must});
651 wakaba 1.86 }
652    
653     ## NOTE: HTML4 definition is too vague - it does not define the syntax
654     ## of percentage value at all (!).
655     }; # $HTMLLengthAttrChecker
656    
657 wakaba 1.161 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
658     my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
659     my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
660    
661 wakaba 1.1 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
662     ## ISSUE: RFC 2046 does not define syntax of media types.
663     ## ISSUE: The definition of "a valid MIME type" is unknown.
664     ## Syntactical correctness?
665     my $HTMLIMTAttrChecker = sub {
666     my ($self, $attr) = @_;
667     my $value = $attr->value;
668     ## ISSUE: RFC 2045 Content-Type header field allows insertion
669     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
670     ## ISSUE: RFC 2231 extension? Maybe no.
671     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
672     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
673 wakaba 1.161 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
674 wakaba 1.1 my @type = ($1, $2);
675     my $param = $3;
676 wakaba 1.161 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
677 wakaba 1.1 if (defined $2) {
678     push @type, $1 => $2;
679     } else {
680     my $n = $1;
681 wakaba 1.152 my $v = $3;
682 wakaba 1.1 $v =~ s/\\(.)/$1/gs;
683 wakaba 1.152 push @type, $n => substr ($v, 1, length ($v) - 2);
684 wakaba 1.1 }
685     }
686     require Whatpm::IMTChecker;
687 wakaba 1.109 my $ic = Whatpm::IMTChecker->new;
688     $ic->{level} = $self->{level};
689     $ic->check_imt (sub {
690 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
691 wakaba 1.1 }, @type);
692     } else {
693 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
694     level => $self->{level}->{must});
695 wakaba 1.1 }
696     }; # $HTMLIMTAttrChecker
697    
698     my $HTMLLanguageTagAttrChecker = sub {
699 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
700    
701 wakaba 1.1 my ($self, $attr) = @_;
702 wakaba 1.6 my $value = $attr->value;
703     require Whatpm::LangTag;
704     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
705 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
706 wakaba 1.106 }, $self->{level});
707 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
708 wakaba 1.6
709     ## TODO: testdata
710 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
711    
712     ## "A valid media query [MQ]"
713     my $HTMLMQAttrChecker = sub {
714     my ($self, $attr) = @_;
715 wakaba 1.104 $self->{onerror}->(node => $attr,
716     type => 'media query',
717     level => $self->{level}->{uncertain});
718 wakaba 1.1 ## ISSUE: What is "a valid media query"?
719     }; # $HTMLMQAttrChecker
720    
721     my $HTMLEventHandlerAttrChecker = sub {
722     my ($self, $attr) = @_;
723 wakaba 1.104 $self->{onerror}->(node => $attr,
724     type => 'event handler',
725     level => $self->{level}->{uncertain});
726 wakaba 1.1 ## TODO: MUST contain valid ECMAScript code matching the
727     ## ECMAScript |FunctionBody| production. [ECMA262]
728     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
729     ## ISSUE: Automatic semicolon insertion does not apply?
730     ## ISSUE: Other script languages?
731     }; # $HTMLEventHandlerAttrChecker
732    
733 wakaba 1.136 my $HTMLFormAttrChecker = sub {
734     my ($self, $attr) = @_;
735    
736     ## NOTE: MUST be the ID of a |form| element.
737    
738     my $value = $attr->value;
739 wakaba 1.138 push @{$self->{idref}}, ['form', $value => $attr];
740 wakaba 1.136
741     ## ISSUE: <form id=""><input form=""> (empty ID)?
742     }; # $HTMLFormAttrChecker
743    
744 wakaba 1.158 my $ListAttrChecker = sub {
745     my ($self, $attr) = @_;
746    
747     ## NOTE: MUST be the ID of a |datalist| element.
748    
749     push @{$self->{idref}}, ['datalist', $attr->value, $attr];
750    
751     ## TODO: Warn violation to control-dependent restrictions. For
752     ## example, |<input type=url maxlength=10 list=a> <datalist
753     ## id=a><option value=nonurlandtoolong></datalist>| should be
754     ## warned.
755     }; # $ListAttrChecker
756    
757 wakaba 1.160 my $PatternAttrChecker = sub {
758     my ($self, $attr) = @_;
759     $self->{onsubdoc}->({s => $attr->value,
760     container_node => $attr,
761     media_type => 'text/x-regexp-js',
762     is_char_string => 1});
763 wakaba 1.161
764     ## ISSUE: "value must match the Pattern production of ECMA 262's
765     ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
766    
767     ## TODO: Warn if @value does not match @pattern.
768 wakaba 1.160 }; # $PatternAttrChecker
769    
770 wakaba 1.161 my $AcceptAttrChecker = sub {
771     my ($self, $attr) = @_;
772    
773     my $value = $attr->value;
774     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
775     my @value = length $value ? split /,/, $value, -1 : ('');
776     my %has_value;
777     for my $v (@value) {
778     if ($has_value{$v}) {
779     $self->{onerror}->(node => $attr,
780     type => 'duplicate token',
781     value => $v,
782     level => $self->{level}->{must});
783     next;
784     }
785     $has_value{$v} = 1;
786    
787     if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
788     #
789     } elsif ($v =~ m[\A$IMTNoParameter\z]) {
790     ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
791     ## define its own syntax citing RFC 4288.
792    
793     ## NOTE: Parameters not allowed.
794     require Whatpm::IMTChecker;
795     my $ic = Whatpm::IMTChecker->new;
796     $ic->{level} = $self->{level};
797     $ic->check_imt (sub {
798     $self->{onerror}->(@_, node => $attr);
799     }, $1, $2);
800     } else {
801     $self->{onerror}->(node => $attr,
802     type => 'IMTnp:syntax error', ## TODOC: type
803     value => $v,
804     level => $self->{level}->{must});
805     }
806     }
807     }; # $AcceptAttrChecker
808    
809 wakaba 1.165 my $FormControlNameAttrChecker = sub {
810     my ($self, $attr) = @_;
811    
812     unless (length $attr->value) {
813     $self->{onerror}->(node => $attr,
814     type => 'empty control name', ## TODOC: type
815     level => $self->{level}->{must});
816     }
817    
818     ## NOTE: No uniqueness constraint.
819     }; # $FormControlNameAttrChecker
820    
821     my $AutofocusAttrChecker = sub {
822     my ($self, $attr) = @_;
823    
824     $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
825    
826     if ($self->{has_autofocus}) {
827     $self->{onerror}->(node => $attr,
828     type => 'duplicate autofocus', ## TODOC: type
829     level => $self->{level}->{must});
830     }
831     $self->{has_autofocus} = 1;
832     }; # $AutofocusAttrChekcer
833    
834 wakaba 1.1 my $HTMLUsemapAttrChecker = sub {
835     my ($self, $attr) = @_;
836 wakaba 1.100 ## MUST be a valid hash-name reference to a |map| element.
837 wakaba 1.1 my $value = $attr->value;
838     if ($value =~ s/^#//) {
839 wakaba 1.100 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
840     ## according to the "rules for parsing a hash-name reference" algorithm.
841     ## The document is non-conforming anyway, since |<map name="">| (empty
842     ## name) is non-conforming.
843 wakaba 1.1 push @{$self->{usemap}}, [$value => $attr];
844     } else {
845 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
846     level => $self->{level}->{must});
847 wakaba 1.1 }
848 wakaba 1.100 ## NOTE: Space characters in hash-name references are conforming.
849 wakaba 1.1 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
850     }; # $HTMLUsemapAttrChecker
851    
852 wakaba 1.76 ## Valid browsing context name
853     my $HTMLBrowsingContextNameAttrChecker = sub {
854     my ($self, $attr) = @_;
855     my $value = $attr->value;
856     if ($value =~ /^_/) {
857     $self->{onerror}->(node => $attr, type => 'window name:reserved',
858 wakaba 1.104 level => $self->{level}->{must},
859 wakaba 1.76 value => $value);
860     } elsif (length $value) {
861     #
862     } else {
863     $self->{onerror}->(node => $attr, type => 'window name:empty',
864 wakaba 1.104 level => $self->{level}->{must});
865 wakaba 1.76 }
866     }; # $HTMLBrowsingContextNameAttrChecker
867    
868     ## Valid browsing context name or keyword
869 wakaba 1.1 my $HTMLTargetAttrChecker = sub {
870     my ($self, $attr) = @_;
871     my $value = $attr->value;
872     if ($value =~ /^_/) {
873     $value = lc $value; ## ISSUE: ASCII case-insentitive?
874     unless ({
875 wakaba 1.76 _blank => 1,_self => 1, _parent => 1, _top => 1,
876 wakaba 1.1 }->{$value}) {
877     $self->{onerror}->(node => $attr,
878 wakaba 1.76 type => 'window name:reserved',
879 wakaba 1.104 level => $self->{level}->{must},
880 wakaba 1.76 value => $value);
881 wakaba 1.1 }
882 wakaba 1.76 } elsif (length $value) {
883     #
884 wakaba 1.1 } else {
885 wakaba 1.76 $self->{onerror}->(node => $attr, type => 'window name:empty',
886 wakaba 1.104 level => $self->{level}->{must});
887 wakaba 1.1 }
888     }; # $HTMLTargetAttrChecker
889    
890 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
891     my ($self, $attr) = @_;
892    
893     ## ISSUE: Namespace resolution?
894    
895     my $value = $attr->value;
896    
897     require Whatpm::CSS::SelectorsParser;
898     my $p = Whatpm::CSS::SelectorsParser->new;
899     $p->{pseudo_class}->{$_} = 1 for qw/
900     active checked disabled empty enabled first-child first-of-type
901     focus hover indeterminate last-child last-of-type link only-child
902     only-of-type root target visited
903     lang nth-child nth-last-child nth-of-type nth-last-of-type not
904     -manakai-contains -manakai-current
905     /;
906    
907     $p->{pseudo_element}->{$_} = 1 for qw/
908     after before first-letter first-line
909     /;
910    
911 wakaba 1.104 $p->{level} = $self->{level};
912 wakaba 1.23 $p->{onerror} = sub {
913 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
914 wakaba 1.23 };
915     $p->parse_string ($value);
916     }; # $HTMLSelectorsAttrChecker
917    
918 wakaba 1.66 my $HTMLAccesskeyAttrChecker = sub {
919     my ($self, $attr) = @_;
920    
921     ## NOTE: "character" or |%Character;| in HTML4.
922    
923     my $value = $attr->value;
924     if (length $value != 1) {
925     $self->{onerror}->(node => $attr, type => 'char:syntax error',
926 wakaba 1.105 level => $self->{level}->{html4_fact});
927 wakaba 1.66 }
928    
929     ## NOTE: "Note. Authors should consider the input method of the expected
930     ## reader when specifying an accesskey." [HTML4] This is hard to implement,
931     ## since it depends on keyboard and so on.
932     ## NOTE: "We recommend that authors include the access key in label text
933     ## or wherever the access key is to apply." [HTML4] (informative)
934     }; # $HTMLAccesskeyAttrChecker
935    
936 wakaba 1.129 my $HTMLCharsetChecker = sub ($$$;$) {
937     my ($charset_value, $self, $attr, $ascii_compat) = @_;
938    
939     ## NOTE: This code is used for |charset=""| attributes, |charset=|
940     ## portion of the |content=""| attributes, and |accept-charset=""|
941     ## attributes.
942 wakaba 1.91
943     ## NOTE: Though the case-sensitivility of |charset| attribute value
944     ## is not explicitly spelled in the HTML5 spec, the Character Set
945     ## registry of IANA, which is referenced from HTML5 spec, says that
946     ## charset name is case-insensitive.
947     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
948    
949     require Message::Charset::Info;
950     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
951    
952     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
953     ## Syntactically valid and registered? What about x-charset names?
954     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
955     ($charset_value)) {
956     $self->{onerror}->(node => $attr,
957 wakaba 1.104 type => 'charset:syntax error',
958     value => $charset_value,
959     level => $self->{level}->{must});
960 wakaba 1.91 }
961    
962     if ($charset) {
963     ## ISSUE: What is "the preferred name for that encoding" (for a charset
964     ## with no "preferred MIME name" label)?
965     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
966     if (($charset_status &
967     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
968     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
969     $self->{onerror}->(node => $attr,
970 wakaba 1.104 type => 'charset:not preferred',
971     value => $charset_value,
972     level => $self->{level}->{must});
973 wakaba 1.91 }
974 wakaba 1.129
975 wakaba 1.91 if (($charset_status &
976     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
977     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
978     if ($charset_value =~ /^x-/) {
979     $self->{onerror}->(node => $attr,
980 wakaba 1.104 type => 'charset:private',
981     value => $charset_value,
982     level => $self->{level}->{good});
983 wakaba 1.91 } else {
984     $self->{onerror}->(node => $attr,
985 wakaba 1.104 type => 'charset:not registered',
986     value => $charset_value,
987     level => $self->{level}->{good});
988 wakaba 1.91 }
989     }
990 wakaba 1.129
991     if ($ascii_compat) {
992     if ($charset->{category} &
993     Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
994     #
995     } else {
996     $self->{onerror}->(node => $attr,
997     type => 'charset:not ascii compat',
998     value => $charset_value,
999     level => $self->{level}->{must});
1000     }
1001     }
1002    
1003 wakaba 1.91 ## TODO: non-preferred-name error for following cases:
1004     } elsif ($charset_value =~ /^x-/) {
1005     $self->{onerror}->(node => $attr,
1006 wakaba 1.104 type => 'charset:private',
1007     value => $charset_value,
1008     level => $self->{level}->{good});
1009 wakaba 1.129
1010     ## NOTE: Whether this is an ASCII-compatible character encoding or
1011     ## not is unknown.
1012 wakaba 1.91 } else {
1013     $self->{onerror}->(node => $attr,
1014 wakaba 1.104 type => 'charset:not registered',
1015     value => $charset_value,
1016     level => $self->{level}->{good});
1017 wakaba 1.129
1018     ## NOTE: Whether this is an ASCII-compatible character encoding or
1019     ## not is unknown.
1020 wakaba 1.91 }
1021    
1022     return ($charset, $charset_value);
1023     }; # $HTMLCharsetChecker
1024    
1025 wakaba 1.129 ## NOTE: "An ordered set of space-separated tokens" where "each token
1026     ## MUST be the preferred name of an ASCII-compatible character
1027     ## encoding".
1028     my $HTMLCharsetsAttrChecker = sub {
1029     my ($self, $attr) = @_;
1030    
1031     ## ISSUE: "ordered set of space-separated tokens" is not defined.
1032    
1033 wakaba 1.132 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1034 wakaba 1.129
1035     ## ISSUE: Uniqueness is not enforced.
1036    
1037     for my $charset (@value) {
1038     $HTMLCharsetChecker->($charset, $self, $attr, 1);
1039     }
1040    
1041     ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1042     }; # $HTMLCharsetsAttrChecker
1043    
1044 wakaba 1.68 my $HTMLColorAttrChecker = sub {
1045     my ($self, $attr) = @_;
1046    
1047     ## NOTE: HTML4 "color" or |%Color;|
1048    
1049     my $value = $attr->value;
1050    
1051     if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1052 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1053 wakaba 1.105 level => $self->{level}->{html4_fact});
1054 wakaba 1.68 }
1055    
1056     ## TODO: HTML4 has some guideline on usage of color.
1057     }; # $HTMLColorAttrChecker
1058    
1059 wakaba 1.79 my $HTMLRefOrTemplateAttrChecker = sub {
1060     my ($self, $attr) = @_;
1061     $HTMLURIAttrChecker->(@_);
1062    
1063     my $attr_name = $attr->name;
1064    
1065     if ($attr_name eq 'ref') {
1066     unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1067     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1068 wakaba 1.104 level => $self->{level}->{must});
1069 wakaba 1.79 }
1070     }
1071 wakaba 1.155
1072     require Message::URL;
1073 wakaba 1.79 my $doc = $attr->owner_document;
1074     my $doc_uri = $doc->document_uri;
1075 wakaba 1.155 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1076 wakaba 1.79 my $no_frag_uri = $uri->clone;
1077     $no_frag_uri->uri_fragment (undef);
1078     if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1079     (not defined $doc_uri and $no_frag_uri eq '')) {
1080     my $fragid = $uri->uri_fragment;
1081     if (defined $fragid) {
1082     push @{$self->{$attr_name}}, [$fragid => $attr];
1083     } else {
1084     DOCEL: {
1085     last DOCEL unless $attr_name eq 'template';
1086    
1087     my $docel = $doc->document_element;
1088     if ($docel) {
1089     my $nsuri = $docel->namespace_uri;
1090     if (defined $nsuri and $nsuri eq $HTML_NS) {
1091     if ($docel->manakai_local_name eq 'datatemplate') {
1092     last DOCEL;
1093     }
1094     }
1095     }
1096    
1097     $self->{onerror}->(node => $attr, type => 'template:not template',
1098 wakaba 1.104 level => $self->{level}->{must});
1099 wakaba 1.79 } # DOCEL
1100     }
1101     } else {
1102     ## TODO: An external document is referenced.
1103     ## The document MUST be an HTML or XML document.
1104     ## If there is a fragment identifier, it MUST point a part of the doc.
1105     ## If the attribute is |template|, the pointed part MUST be a
1106     ## |datatemplat| element.
1107     ## If no fragment identifier is specified, the root element MUST be
1108     ## a |datatemplate| element when the attribute is |template|.
1109     }
1110     }; # $HTMLRefOrTemplateAttrChecker
1111    
1112 wakaba 1.83 my $HTMLRepeatIndexAttrChecker = sub {
1113     my ($self, $attr) = @_;
1114    
1115     if (defined $attr->namespace_uri) {
1116     my $oe = $attr->owner_element;
1117     my $oe_nsuri = $oe->namespace_uri;
1118 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1119 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1120 wakaba 1.104 level => $self->{level}->{must});
1121 wakaba 1.83 }
1122     }
1123    
1124     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1125     }; # $HTMLRepeatIndexAttrChecker
1126    
1127 wakaba 1.1 my $HTMLAttrChecker = {
1128 wakaba 1.58 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1129 wakaba 1.1 id => sub {
1130 wakaba 1.135 my ($self, $attr, $item, $element_state) = @_;
1131 wakaba 1.1 my $value = $attr->value;
1132     if (length $value > 0) {
1133     if ($self->{id}->{$value}) {
1134 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1135     level => $self->{level}->{must});
1136 wakaba 1.1 push @{$self->{id}->{$value}}, $attr;
1137     } else {
1138     $self->{id}->{$value} = [$attr];
1139 wakaba 1.135 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1140 wakaba 1.1 }
1141 wakaba 1.132 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1142 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'space in ID',
1143     level => $self->{level}->{must});
1144 wakaba 1.1 }
1145     } else {
1146     ## NOTE: MUST contain at least one character
1147 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1148     level => $self->{level}->{must});
1149 wakaba 1.1 }
1150     },
1151     title => sub {}, ## NOTE: No conformance creteria
1152     lang => sub {
1153     my ($self, $attr) = @_;
1154 wakaba 1.6 my $value = $attr->value;
1155     if ($value eq '') {
1156     #
1157     } else {
1158     require Whatpm::LangTag;
1159     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1160 wakaba 1.104 $self->{onerror}->(@_, node => $attr);
1161 wakaba 1.106 }, $self->{level});
1162 wakaba 1.6 }
1163 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
1164 wakaba 1.6
1165     ## TODO: test data
1166 wakaba 1.111
1167     ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1168     ## non-conforming. Such errors are detected by the checkers of
1169     ## |{}xml:lang| and |{xml}:lang| attributes.
1170 wakaba 1.1 },
1171     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1172     class => sub {
1173     my ($self, $attr) = @_;
1174 wakaba 1.132
1175     ## NOTE: "Unordered set of unique space-separated tokens".
1176    
1177 wakaba 1.1 my %word;
1178 wakaba 1.132 for my $word (grep {length $_}
1179     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1180 wakaba 1.1 unless ($word{$word}) {
1181     $word{$word} = 1;
1182     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1183     } else {
1184 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'duplicate token',
1185     value => $word,
1186     level => $self->{level}->{must});
1187 wakaba 1.1 }
1188     }
1189     },
1190 wakaba 1.63 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1191     true => 1, false => 1, '' => 1,
1192     }),
1193 wakaba 1.1 contextmenu => sub {
1194     my ($self, $attr) = @_;
1195     my $value = $attr->value;
1196 wakaba 1.138 push @{$self->{idref}}, ['menu', $value => $attr];
1197 wakaba 1.1 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1198     ## What is "in the DOM"? A menu Element node that is not part
1199     ## of the Document tree is in the DOM? A menu Element node that
1200     ## belong to another Document tree is in the DOM?
1201     },
1202 wakaba 1.115 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1203 wakaba 1.60 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1204 wakaba 1.79 ref => $HTMLRefOrTemplateAttrChecker,
1205     registrationmark => sub {
1206     my ($self, $attr, $item, $element_state) = @_;
1207    
1208     ## NOTE: Any value is conforming.
1209    
1210     if ($self->{flag}->{in_rule}) {
1211     my $el = $attr->owner_element;
1212     my $ln = $el->manakai_local_name;
1213     if ($ln eq 'nest' or
1214     ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1215     my $nsuri = $el->namespace_uri;
1216     if (defined $nsuri and $nsuri eq $HTML_NS) {
1217     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1218 wakaba 1.104 level => $self->{level}->{must});
1219 wakaba 1.79 }
1220     }
1221     } else {
1222     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1223 wakaba 1.104 level => $self->{level}->{must});
1224 wakaba 1.79 }
1225     },
1226 wakaba 1.80 repeat => sub {
1227     my ($self, $attr) = @_;
1228 wakaba 1.83
1229     if (defined $attr->namespace_uri) {
1230     my $oe = $attr->owner_element;
1231     my $oe_nsuri = $oe->namespace_uri;
1232     if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1233     $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1234 wakaba 1.104 level => $self->{level}->{must});
1235 wakaba 1.83 }
1236     }
1237    
1238 wakaba 1.80 my $value = $attr->value;
1239     if ($value eq 'template') {
1240     #
1241     } elsif ($value =~ /\A-?[0-9]+\z/) {
1242     #
1243     } else {
1244     $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1245 wakaba 1.104 level => $self->{level}->{must});
1246 wakaba 1.80 }
1247    
1248     ## ISSUE: "Repetition templates may occur anywhere." Does that mean
1249     ## that the attribute MAY be specified to any element, or that the
1250     ## element with that attribute (i.e. a repetition template) can be
1251     ## inserted anywhere in a document tree?
1252     },
1253 wakaba 1.83 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1254     'repeat-max' => $HTMLRepeatIndexAttrChecker,
1255     'repeat-start' => $HTMLRepeatIndexAttrChecker,
1256 wakaba 1.80 'repeat-template' => sub {
1257 wakaba 1.83 my ($self, $attr) = @_;
1258    
1259     if (defined $attr->namespace_uri) {
1260     my $oe = $attr->owner_element;
1261     my $oe_nsuri = $oe->namespace_uri;
1262 wakaba 1.128 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: This condition is wrong?
1263 wakaba 1.83 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1264 wakaba 1.104 level => $self->{level}->{must});
1265 wakaba 1.83 }
1266     }
1267    
1268 wakaba 1.80 ## ISSUE: This attribute has no conformance requirement.
1269     ## ISSUE: Repetition blocks MAY have this attribute. Then, is the
1270     ## attribute allowed on an element that is not a repetition block?
1271     },
1272 wakaba 1.58 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1273 wakaba 1.128 style => sub {
1274     my ($self, $attr) = @_;
1275    
1276     $self->{onsubdoc}->({s => $attr->value,
1277     container_node => $attr,
1278     media_type => 'text/x-css-inline',
1279     is_char_string => 1});
1280    
1281     ## NOTE: "... MUST still be comprehensible and usable if those
1282     ## attributes were removed" is a semantic requirement, it cannot
1283     ## be tested.
1284     },
1285 wakaba 1.74 tabindex => $HTMLIntegerAttrChecker,
1286 wakaba 1.79 template => $HTMLRefOrTemplateAttrChecker,
1287 wakaba 1.111 'xml:lang' => sub {
1288     my ($self, $attr) = @_;
1289    
1290     if ($attr->owner_document->manakai_is_html) {
1291     $self->{onerror}->(type => 'in HTML:xml:lang',
1292     level => $self->{level}->{info},
1293     node => $attr);
1294     ## NOTE: This is not an error, but the attribute will be ignored.
1295     } else {
1296     $self->{onerror}->(type => 'in XML:xml:lang',
1297     level => $self->{level}->{html5_no_may},
1298     node => $attr);
1299     ## TODO: We need to add test for this error.
1300     }
1301    
1302     my $lang_attr = $attr->owner_element->get_attribute_node_ns
1303     (undef, 'lang');
1304     if ($lang_attr) {
1305     my $lang_attr_value = $lang_attr->value;
1306     $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1307     my $value = $attr->value;
1308     $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1309     if ($lang_attr_value ne $value) {
1310     $self->{onerror}->(type => 'xml:lang ne lang',
1311     level => $self->{level}->{must},
1312     node => $attr);
1313     }
1314     } else {
1315     $self->{onerror}->(type => 'xml:lang not allowed',
1316     level => $self->{level}->{must},
1317     node => $attr);
1318     ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1319     }
1320     },
1321 wakaba 1.74 xmlns => sub {
1322     my ($self, $attr) = @_;
1323     my $value = $attr->value;
1324     unless ($value eq $HTML_NS) {
1325 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1326     level => $self->{level}->{must});
1327 wakaba 1.74 ## TODO: Should be new "bad namespace" error?
1328     }
1329     unless ($attr->owner_document->manakai_is_html) {
1330 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1331     level => $self->{level}->{must});
1332 wakaba 1.74 ## TODO: Test
1333     }
1334    
1335     ## TODO: Should be resolved?
1336     push @{$self->{return}->{uri}->{$value} ||= []},
1337     {node => $attr, type => {namespace => 1}};
1338     },
1339 wakaba 1.1 };
1340    
1341 wakaba 1.79 ## ISSUE: Shouldn't the same-origin policy applied to the datatemplate feature?
1342    
1343 wakaba 1.49 my %HTMLAttrStatus = (
1344 wakaba 1.153 class => FEATURE_HTML5_WD,
1345 wakaba 1.50 contenteditable => FEATURE_HTML5_DEFAULT,
1346     contextmenu => FEATURE_HTML5_WD,
1347 wakaba 1.153 dir => FEATURE_HTML5_WD,
1348 wakaba 1.50 draggable => FEATURE_HTML5_LC,
1349 wakaba 1.115 hidden => FEATURE_HTML5_DEFAULT,
1350 wakaba 1.153 id => FEATURE_HTML5_WD,
1351 wakaba 1.115 irrelevant => FEATURE_HTML5_DROPPED,
1352 wakaba 1.153 lang => FEATURE_HTML5_WD,
1353 wakaba 1.50 ref => FEATURE_HTML5_AT_RISK,
1354     registrationmark => FEATURE_HTML5_AT_RISK,
1355 wakaba 1.60 repeat => FEATURE_WF2,
1356     'repeat-max' => FEATURE_WF2,
1357     'repeat-min' => FEATURE_WF2,
1358     'repeat-start' => FEATURE_WF2,
1359     'repeat-template' => FEATURE_WF2,
1360 wakaba 1.154 role => 0,
1361 wakaba 1.153 style => FEATURE_HTML5_WD,
1362 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT,
1363     template => FEATURE_HTML5_AT_RISK,
1364 wakaba 1.153 title => FEATURE_HTML5_WD,
1365 wakaba 1.154 xmlns => FEATURE_HTML5_WD,
1366 wakaba 1.49 );
1367    
1368     my %HTMLM12NCommonAttrStatus = (
1369 wakaba 1.154 about => FEATURE_RDFA_REC,
1370 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1371 wakaba 1.154 content => FEATURE_RDFA_REC,
1372     datatype => FEATURE_RDFA_REC,
1373 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1374 wakaba 1.154 href => FEATURE_RDFA_REC,
1375 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1376 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED,
1377 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1378     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1379     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1380     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1381     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1382     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1383     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1384     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1385     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1386     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1387 wakaba 1.154 property => FEATURE_RDFA_REC,
1388     rel => FEATURE_RDFA_REC,
1389     resource => FEATURE_RDFA_REC,
1390     rev => FEATURE_RDFA_REC,
1391 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1392 wakaba 1.78 # FEATURE_M12N10_REC,
1393 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1394 wakaba 1.55 FEATURE_M12N10_REC,
1395 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1396 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1397 wakaba 1.49 );
1398    
1399 wakaba 1.82 my %XHTML2CommonAttrStatus = (
1400     ## Core
1401 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1402     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1403 wakaba 1.82 #xml:id
1404     layout => FEATURE_XHTML2_ED,
1405 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1406 wakaba 1.82
1407     ## Hypertext
1408     cite => FEATURE_XHTML2_ED,
1409     href => FEATURE_XHTML2_ED,
1410     hreflang => FEATURE_XHTML2_ED,
1411     hrefmedia => FEATURE_XHTML2_ED,
1412     hreftype => FEATURE_XHTML2_ED,
1413     nextfocus => FEATURE_XHTML2_ED,
1414     prevfocus => FEATURE_XHTML2_ED,
1415     target => FEATURE_XHTML2_ED,
1416     #xml:base
1417    
1418     ## I18N
1419     #xml:lang
1420    
1421     ## Bi-directional
1422 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
1423 wakaba 1.82
1424     ## Edit
1425     edit => FEATURE_XHTML2_ED,
1426     datetime => FEATURE_XHTML2_ED,
1427    
1428     ## Embedding
1429     encoding => FEATURE_XHTML2_ED,
1430     src => FEATURE_XHTML2_ED,
1431     srctype => FEATURE_XHTML2_ED,
1432    
1433     ## Image Map
1434     usemap => FEATURE_XHTML2_ED,
1435     ismap => FEATURE_XHTML2_ED,
1436     shape => FEATURE_XHTML2_ED,
1437     coords => FEATURE_XHTML2_ED,
1438    
1439     ## Media
1440     media => FEATURE_XHTML2_ED,
1441    
1442     ## Metadata
1443     about => FEATURE_XHTML2_ED,
1444     content => FEATURE_XHTML2_ED,
1445     datatype => FEATURE_XHTML2_ED,
1446     instanceof => FEATURE_XHTML2_ED,
1447     property => FEATURE_XHTML2_ED,
1448     rel => FEATURE_XHTML2_ED,
1449     resource => FEATURE_XHTML2_ED,
1450     rev => FEATURE_XHTML2_ED,
1451    
1452     ## Role
1453 wakaba 1.154 role => FEATURE_XHTML2_ED,
1454 wakaba 1.82
1455     ## Style
1456 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML2_ED, # "strongly discouraged"
1457 wakaba 1.82 );
1458    
1459     my %HTMLM12NXHTML2CommonAttrStatus = (
1460     %HTMLM12NCommonAttrStatus,
1461     %XHTML2CommonAttrStatus,
1462    
1463 wakaba 1.154 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1464 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1465 wakaba 1.154 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1466     datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1467 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1468 wakaba 1.154 href => FEATURE_RDFA_REC,
1469 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1470 wakaba 1.154 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1471     property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1472     rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1473     resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1474     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1475 wakaba 1.153 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1476 wakaba 1.82 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1477 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR |
1478 wakaba 1.82 FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1479 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1480 wakaba 1.154 typeof => FEATURE_RDFA_REC,
1481 wakaba 1.82 );
1482    
1483 wakaba 1.1 for (qw/
1484     onabort onbeforeunload onblur onchange onclick oncontextmenu
1485     ondblclick ondrag ondragend ondragenter ondragleave ondragover
1486     ondragstart ondrop onerror onfocus onkeydown onkeypress
1487     onkeyup onload onmessage onmousedown onmousemove onmouseout
1488     onmouseover onmouseup onmousewheel onresize onscroll onselect
1489 wakaba 1.77 onstorage onsubmit onunload
1490 wakaba 1.1 /) {
1491     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1492 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1493 wakaba 1.1 }
1494    
1495 wakaba 1.82 ## NOTE: Non-standard global attributes in the HTML namespace.
1496     $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1497     $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1498    
1499     $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1500     for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1501     $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1502     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1503     }
1504    
1505 wakaba 1.120 for (qw/about content datatype property rel resource rev/) {
1506 wakaba 1.154 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1507 wakaba 1.82 }
1508 wakaba 1.154 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1509     $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1510 wakaba 1.82 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1511     for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1512     ismap layout media nextfocus prevfocus shape src srctype style
1513     target usemap/) {
1514     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1515     }
1516     for (qw/class dir id title/) {
1517     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1518     }
1519     for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1520     onmouseout onkeypress onkeydown onkeyup/) {
1521     $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1522     }
1523    
1524 wakaba 1.73 my $HTMLDatasetAttrChecker = sub {
1525     ## NOTE: "Authors should ... when the attributes are ignored and
1526     ## any associated CSS dropped, the page is still usable." (semantic
1527     ## constraint.)
1528     }; # $HTMLDatasetAttrChecker
1529    
1530 wakaba 1.153 my $HTMLDatasetAttrStatus = FEATURE_HTML5_WD;
1531 wakaba 1.73
1532 wakaba 1.1 my $GetHTMLAttrsChecker = sub {
1533     my $element_specific_checker = shift;
1534 wakaba 1.49 my $element_specific_status = shift;
1535 wakaba 1.1 return sub {
1536 wakaba 1.40 my ($self, $item, $element_state) = @_;
1537     for my $attr (@{$item->{node}->attributes}) {
1538 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1539     $attr_ns = '' unless defined $attr_ns;
1540     my $attr_ln = $attr->manakai_local_name;
1541     my $checker;
1542 wakaba 1.73 my $status;
1543 wakaba 1.1 if ($attr_ns eq '') {
1544 wakaba 1.122 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1545     $attr_ln !~ /[A-Z]/) {
1546 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
1547     $status = $HTMLDatasetAttrStatus;
1548     } else {
1549     $checker = $element_specific_checker->{$attr_ln}
1550     || $HTMLAttrChecker->{$attr_ln};
1551     $status = $element_specific_status->{$attr_ln};
1552     }
1553 wakaba 1.1 }
1554     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1555 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
1556 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1557     || $AttrStatus->{$attr_ns}->{''};
1558     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1559 wakaba 1.1 if ($checker) {
1560 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
1561 wakaba 1.62 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1562 wakaba 1.54 #
1563 wakaba 1.1 } else {
1564 wakaba 1.104 $self->{onerror}->(node => $attr,
1565     type => 'unknown attribute',
1566     level => $self->{level}->{uncertain});
1567 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1568     }
1569 wakaba 1.82 $self->_attr_status_info ($attr, $status);
1570 wakaba 1.1 }
1571     };
1572     }; # $GetHTMLAttrsChecker
1573    
1574 wakaba 1.40 my %HTMLChecker = (
1575     %Whatpm::ContentChecker::AnyChecker,
1576 wakaba 1.79 check_start => sub {
1577     my ($self, $item, $element_state) = @_;
1578    
1579     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1580     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1581     },
1582 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1583 wakaba 1.40 );
1584    
1585     my %HTMLEmptyChecker = (
1586     %HTMLChecker,
1587     check_child_element => sub {
1588     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1589     $child_is_transparent, $element_state) = @_;
1590 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1591     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1592 wakaba 1.40 $self->{onerror}->(node => $child_el,
1593     type => 'element not allowed:minus',
1594 wakaba 1.104 level => $self->{level}->{must});
1595 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1596     #
1597     } else {
1598     $self->{onerror}->(node => $child_el,
1599     type => 'element not allowed:empty',
1600 wakaba 1.104 level => $self->{level}->{must});
1601 wakaba 1.40 }
1602     },
1603     check_child_text => sub {
1604     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1605     if ($has_significant) {
1606     $self->{onerror}->(node => $child_node,
1607     type => 'character not allowed:empty',
1608 wakaba 1.104 level => $self->{level}->{must});
1609 wakaba 1.40 }
1610     },
1611     );
1612    
1613     my %HTMLTextChecker = (
1614     %HTMLChecker,
1615     check_child_element => sub {
1616     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1617     $child_is_transparent, $element_state) = @_;
1618 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1619     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1620 wakaba 1.40 $self->{onerror}->(node => $child_el,
1621     type => 'element not allowed:minus',
1622 wakaba 1.104 level => $self->{level}->{must});
1623 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1624     #
1625     } else {
1626 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1627     level => $self->{level}->{must});
1628 wakaba 1.40 }
1629     },
1630     );
1631    
1632 wakaba 1.72 my %HTMLFlowContentChecker = (
1633 wakaba 1.40 %HTMLChecker,
1634     check_child_element => sub {
1635     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1636     $child_is_transparent, $element_state) = @_;
1637 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1638     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1639 wakaba 1.40 $self->{onerror}->(node => $child_el,
1640     type => 'element not allowed:minus',
1641 wakaba 1.104 level => $self->{level}->{must});
1642 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1643     #
1644     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1645     if ($element_state->{has_non_style} or
1646     not $child_el->has_attribute_ns (undef, 'scoped')) {
1647 wakaba 1.104 $self->{onerror}->(node => $child_el,
1648 wakaba 1.72 type => 'element not allowed:flow style',
1649 wakaba 1.104 level => $self->{level}->{must});
1650 wakaba 1.40 }
1651 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1652 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1653 wakaba 1.40 } else {
1654     $element_state->{has_non_style} = 1;
1655 wakaba 1.104 $self->{onerror}->(node => $child_el,
1656 wakaba 1.72 type => 'element not allowed:flow',
1657 wakaba 1.104 level => $self->{level}->{must})
1658 wakaba 1.40 }
1659     },
1660     check_child_text => sub {
1661     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1662     if ($has_significant) {
1663     $element_state->{has_non_style} = 1;
1664     }
1665     },
1666     check_end => sub {
1667     my ($self, $item, $element_state) = @_;
1668 wakaba 1.95 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1669 wakaba 1.40 if ($element_state->{has_significant}) {
1670 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
1671 wakaba 1.40 } elsif ($item->{transparent}) {
1672     #
1673     } else {
1674     $self->{onerror}->(node => $item->{node},
1675 wakaba 1.104 level => $self->{level}->{should},
1676 wakaba 1.40 type => 'no significant content');
1677     }
1678     },
1679     );
1680    
1681     my %HTMLPhrasingContentChecker = (
1682     %HTMLChecker,
1683     check_child_element => sub {
1684     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1685     $child_is_transparent, $element_state) = @_;
1686 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1687     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1688 wakaba 1.40 $self->{onerror}->(node => $child_el,
1689     type => 'element not allowed:minus',
1690 wakaba 1.104 level => $self->{level}->{must});
1691 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1692     #
1693     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1694     #
1695     } else {
1696     $self->{onerror}->(node => $child_el,
1697     type => 'element not allowed:phrasing',
1698 wakaba 1.104 level => $self->{level}->{must});
1699 wakaba 1.40 }
1700     },
1701 wakaba 1.72 check_end => $HTMLFlowContentChecker{check_end},
1702 wakaba 1.40 ## NOTE: The definition for |li| assumes that the only differences
1703 wakaba 1.72 ## between flow and phrasing content checkers are |check_child_element|
1704 wakaba 1.40 ## and |check_child_text|.
1705     );
1706    
1707 wakaba 1.72 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1708 wakaba 1.40 ## ISSUE: Significant content rule should be applied to transparent element
1709 wakaba 1.46 ## with parent?
1710 wakaba 1.40
1711 wakaba 1.1 our $Element;
1712     our $ElementDefault;
1713    
1714     $Element->{$HTML_NS}->{''} = {
1715 wakaba 1.40 %HTMLChecker,
1716 wakaba 1.1 };
1717    
1718     $Element->{$HTML_NS}->{html} = {
1719 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1720 wakaba 1.1 is_root => 1,
1721 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1722 wakaba 1.16 manifest => $HTMLURIAttrChecker,
1723 wakaba 1.67 version => sub {
1724     ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1725     ## Though DTDs of various versions of HTML define the attribute
1726     ## as |#FIXED|, this conformance checker does no check for
1727     ## the attribute value, since what kind of check should be done
1728     ## is unknown.
1729     },
1730 wakaba 1.49 }, {
1731     %HTMLAttrStatus,
1732 wakaba 1.82 %XHTML2CommonAttrStatus,
1733 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1734     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1735     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1736     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1737     manifest => FEATURE_HTML5_WD,
1738 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1739 wakaba 1.82 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1740 wakaba 1.1 }),
1741 wakaba 1.40 check_start => sub {
1742     my ($self, $item, $element_state) = @_;
1743     $element_state->{phase} = 'before head';
1744 wakaba 1.79
1745 wakaba 1.66 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1746 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1747     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1748 wakaba 1.40 },
1749     check_child_element => sub {
1750     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1751     $child_is_transparent, $element_state) = @_;
1752 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1753     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1754 wakaba 1.40 $self->{onerror}->(node => $child_el,
1755     type => 'element not allowed:minus',
1756 wakaba 1.104 level => $self->{level}->{must});
1757 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1758     #
1759     } elsif ($element_state->{phase} eq 'before head') {
1760     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1761     $element_state->{phase} = 'after head';
1762     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1763     $self->{onerror}->(node => $child_el,
1764 wakaba 1.104 type => 'ps element missing',
1765     text => 'head',
1766     level => $self->{level}->{must});
1767 wakaba 1.40 $element_state->{phase} = 'after body';
1768     } else {
1769     $self->{onerror}->(node => $child_el,
1770 wakaba 1.104 type => 'element not allowed',
1771     level => $self->{level}->{must});
1772 wakaba 1.40 }
1773     } elsif ($element_state->{phase} eq 'after head') {
1774     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1775     $element_state->{phase} = 'after body';
1776     } else {
1777     $self->{onerror}->(node => $child_el,
1778 wakaba 1.104 type => 'element not allowed',
1779     level => $self->{level}->{must});
1780 wakaba 1.40 }
1781     } elsif ($element_state->{phase} eq 'after body') {
1782     $self->{onerror}->(node => $child_el,
1783 wakaba 1.104 type => 'element not allowed',
1784     level => $self->{level}->{must});
1785 wakaba 1.40 } else {
1786     die "check_child_element: Bad |html| phase: $element_state->{phase}";
1787     }
1788     },
1789     check_child_text => sub {
1790     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1791     if ($has_significant) {
1792     $self->{onerror}->(node => $child_node,
1793 wakaba 1.104 type => 'character not allowed',
1794     level => $self->{level}->{must});
1795 wakaba 1.40 }
1796     },
1797     check_end => sub {
1798     my ($self, $item, $element_state) = @_;
1799     if ($element_state->{phase} eq 'after body') {
1800     #
1801     } elsif ($element_state->{phase} eq 'before head') {
1802     $self->{onerror}->(node => $item->{node},
1803 wakaba 1.104 type => 'child element missing',
1804     text => 'head',
1805     level => $self->{level}->{must});
1806 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1807 wakaba 1.104 type => 'child element missing',
1808     text => 'body',
1809     level => $self->{level}->{must});
1810 wakaba 1.40 } elsif ($element_state->{phase} eq 'after head') {
1811     $self->{onerror}->(node => $item->{node},
1812 wakaba 1.104 type => 'child element missing',
1813     text => 'body',
1814     level => $self->{level}->{must});
1815 wakaba 1.40 } else {
1816     die "check_end: Bad |html| phase: $element_state->{phase}";
1817     }
1818 wakaba 1.1
1819 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1820     },
1821     };
1822 wakaba 1.25
1823 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
1824 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1825 wakaba 1.67 check_attrs => $GetHTMLAttrsChecker->({
1826     profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1827     }, {
1828 wakaba 1.49 %HTMLAttrStatus,
1829 wakaba 1.82 %XHTML2CommonAttrStatus,
1830 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1831     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1832     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1833     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1834 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1835     }),
1836 wakaba 1.40 check_child_element => sub {
1837     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1838     $child_is_transparent, $element_state) = @_;
1839 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1840     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1841 wakaba 1.40 $self->{onerror}->(node => $child_el,
1842     type => 'element not allowed:minus',
1843 wakaba 1.104 level => $self->{level}->{must});
1844 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1845     #
1846     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1847     unless ($element_state->{has_title}) {
1848     $element_state->{has_title} = 1;
1849     } else {
1850     $self->{onerror}->(node => $child_el,
1851     type => 'element not allowed:head title',
1852 wakaba 1.104 level => $self->{level}->{must});
1853 wakaba 1.40 }
1854     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1855     if ($child_el->has_attribute_ns (undef, 'scoped')) {
1856     $self->{onerror}->(node => $child_el,
1857     type => 'element not allowed:head style',
1858 wakaba 1.104 level => $self->{level}->{must});
1859 wakaba 1.1 }
1860 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1861     #
1862    
1863     ## NOTE: |meta| is a metadata content. However, strictly speaking,
1864     ## a |meta| element with none of |charset|, |name|,
1865     ## or |http-equiv| attribute is not allowed. It is non-conforming
1866     ## anyway.
1867 wakaba 1.56
1868     ## TODO: |form| MUST be empty and in XML [WF2].
1869 wakaba 1.40 } else {
1870     $self->{onerror}->(node => $child_el,
1871     type => 'element not allowed:metadata',
1872 wakaba 1.104 level => $self->{level}->{must});
1873 wakaba 1.40 }
1874     $element_state->{in_head_original} = $self->{flag}->{in_head};
1875     $self->{flag}->{in_head} = 1;
1876     },
1877     check_child_text => sub {
1878     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1879     if ($has_significant) {
1880 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1881     level => $self->{level}->{must});
1882 wakaba 1.1 }
1883 wakaba 1.40 },
1884     check_end => sub {
1885     my ($self, $item, $element_state) = @_;
1886     unless ($element_state->{has_title}) {
1887     $self->{onerror}->(node => $item->{node},
1888 wakaba 1.104 type => 'child element missing',
1889     text => 'title',
1890 wakaba 1.105 level => $self->{level}->{must});
1891 wakaba 1.1 }
1892 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
1893 wakaba 1.1
1894 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1895 wakaba 1.1 },
1896     };
1897    
1898 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
1899     %HTMLTextChecker,
1900 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1901 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
1902     %HTMLAttrStatus,
1903 wakaba 1.82 %XHTML2CommonAttrStatus,
1904 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1905     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1906     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
1907     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1908 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
1909 wakaba 1.49 }),
1910 wakaba 1.40 };
1911 wakaba 1.1
1912 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
1913 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1914 wakaba 1.40 %HTMLEmptyChecker,
1915     check_attrs => sub {
1916     my ($self, $item, $element_state) = @_;
1917 wakaba 1.1
1918 wakaba 1.40 if ($self->{has_base}) {
1919     $self->{onerror}->(node => $item->{node},
1920 wakaba 1.104 type => 'element not allowed:base',
1921     level => $self->{level}->{must});
1922 wakaba 1.40 } else {
1923     $self->{has_base} = 1;
1924 wakaba 1.29 }
1925    
1926 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
1927     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
1928 wakaba 1.14
1929     if ($self->{has_uri_attr} and $has_href) {
1930 wakaba 1.4 ## ISSUE: Are these examples conforming?
1931     ## <head profile="a b c"><base href> (except for |profile|'s
1932     ## non-conformance)
1933     ## <title xml:base="relative"/><base href/> (maybe it should be)
1934     ## <unknown xmlns="relative"/><base href/> (assuming that
1935     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
1936     ## <style>@import 'relative';</style><base href>
1937     ## <script>location.href = 'relative';</script><base href>
1938 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
1939     ## an exception.
1940 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1941 wakaba 1.104 type => 'basehref after URL attribute',
1942     level => $self->{level}->{must});
1943 wakaba 1.4 }
1944 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
1945 wakaba 1.4 ## ISSUE: Are these examples conforming?
1946     ## <head><title xlink:href=""/><base target="name"/></head>
1947     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1948     ## (assuming that |xbl:xbl| is allowed before |base|)
1949     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1950     ## <link href=""/><base target="name"/>
1951     ## <link rel=unknown href=""><base target=name>
1952 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1953 wakaba 1.104 type => 'basetarget after hyperlink',
1954     level => $self->{level}->{must});
1955 wakaba 1.4 }
1956    
1957 wakaba 1.14 if (not $has_href and not $has_target) {
1958 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1959 wakaba 1.104 type => 'attribute missing:href|target',
1960     level => $self->{level}->{must});
1961 wakaba 1.14 }
1962    
1963 wakaba 1.66 $element_state->{uri_info}->{href}->{type}->{base} = 1;
1964    
1965 wakaba 1.4 return $GetHTMLAttrsChecker->({
1966     href => $HTMLURIAttrChecker,
1967     target => $HTMLTargetAttrChecker,
1968 wakaba 1.49 }, {
1969     %HTMLAttrStatus,
1970 wakaba 1.153 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1971     id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
1972     target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
1973 wakaba 1.40 })->($self, $item, $element_state);
1974 wakaba 1.4 },
1975 wakaba 1.1 };
1976    
1977     $Element->{$HTML_NS}->{link} = {
1978 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1979 wakaba 1.40 %HTMLEmptyChecker,
1980     check_attrs => sub {
1981     my ($self, $item, $element_state) = @_;
1982 wakaba 1.96 my $sizes_attr;
1983 wakaba 1.1 $GetHTMLAttrsChecker->({
1984 wakaba 1.91 charset => sub {
1985     my ($self, $attr) = @_;
1986     $HTMLCharsetChecker->($attr->value, @_);
1987     },
1988 wakaba 1.1 href => $HTMLURIAttrChecker,
1989 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1990 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
1991 wakaba 1.1 media => $HTMLMQAttrChecker,
1992     hreflang => $HTMLLanguageTagAttrChecker,
1993 wakaba 1.96 sizes => sub {
1994     my ($self, $attr) = @_;
1995     $sizes_attr = $attr;
1996     my %word;
1997     for my $word (grep {length $_}
1998 wakaba 1.132 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1999 wakaba 1.96 unless ($word{$word}) {
2000     $word{$word} = 1;
2001     if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2002     #
2003     } else {
2004     $self->{onerror}->(node => $attr,
2005 wakaba 1.104 type => 'sizes:syntax error',
2006 wakaba 1.96 value => $word,
2007 wakaba 1.104 level => $self->{level}->{must});
2008 wakaba 1.96 }
2009     } else {
2010     $self->{onerror}->(node => $attr, type => 'duplicate token',
2011     value => $word,
2012 wakaba 1.104 level => $self->{level}->{must});
2013 wakaba 1.96 }
2014     }
2015     },
2016 wakaba 1.70 target => $HTMLTargetAttrChecker,
2017 wakaba 1.1 type => $HTMLIMTAttrChecker,
2018     ## NOTE: Though |title| has special semantics,
2019     ## syntactically same as the |title| as global attribute.
2020 wakaba 1.49 }, {
2021     %HTMLAttrStatus,
2022 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2023 wakaba 1.91 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2024     ## NOTE: |charset| attribute had been part of HTML5 spec though
2025     ## it had been commented out.
2026 wakaba 1.154 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2027 wakaba 1.82 FEATURE_M12N10_REC,
2028 wakaba 1.153 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2029     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2030     media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2031 wakaba 1.61 methods => FEATURE_HTML20_RFC,
2032 wakaba 1.154 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2033 wakaba 1.153 FEATURE_M12N10_REC,
2034 wakaba 1.154 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2035 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2036 wakaba 1.153 sizes => FEATURE_HTML5_LC,
2037 wakaba 1.82 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2038 wakaba 1.153 # title: HTML5_WD | HTML5_LC | ...
2039     type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2040 wakaba 1.61 urn => FEATURE_HTML20_RFC,
2041 wakaba 1.40 })->($self, $item, $element_state);
2042 wakaba 1.96
2043 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2044     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2045 wakaba 1.4 } else {
2046 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2047 wakaba 1.104 type => 'attribute missing',
2048     text => 'href',
2049     level => $self->{level}->{must});
2050 wakaba 1.1 }
2051 wakaba 1.96
2052 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2053     $self->{onerror}->(node => $item->{node},
2054 wakaba 1.104 type => 'attribute missing',
2055     text => 'rel',
2056     level => $self->{level}->{must});
2057 wakaba 1.96 }
2058    
2059     if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2060     $self->{onerror}->(node => $sizes_attr,
2061     type => 'attribute not allowed',
2062 wakaba 1.104 level => $self->{level}->{must});
2063 wakaba 1.1 }
2064 wakaba 1.116
2065     if ($element_state->{link_rel}->{alternate} and
2066     $element_state->{link_rel}->{stylesheet}) {
2067     my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2068     unless ($title_attr) {
2069     $self->{onerror}->(node => $item->{node},
2070     type => 'attribute missing',
2071     text => 'title',
2072     level => $self->{level}->{must});
2073     } elsif ($title_attr->value eq '') {
2074     $self->{onerror}->(node => $title_attr,
2075     type => 'empty style sheet title',
2076     level => $self->{level}->{must});
2077     }
2078     }
2079 wakaba 1.1 },
2080     };
2081    
2082     $Element->{$HTML_NS}->{meta} = {
2083 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2084 wakaba 1.40 %HTMLEmptyChecker,
2085     check_attrs => sub {
2086     my ($self, $item, $element_state) = @_;
2087 wakaba 1.1 my $name_attr;
2088     my $http_equiv_attr;
2089     my $charset_attr;
2090     my $content_attr;
2091 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2092 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2093     $attr_ns = '' unless defined $attr_ns;
2094     my $attr_ln = $attr->manakai_local_name;
2095     my $checker;
2096 wakaba 1.73 my $status;
2097 wakaba 1.1 if ($attr_ns eq '') {
2098 wakaba 1.73 $status = {
2099     %HTMLAttrStatus,
2100 wakaba 1.82 %XHTML2CommonAttrStatus,
2101 wakaba 1.153 charset => FEATURE_HTML5_WD,
2102     content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2103     dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2104     'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2105     id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2106     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2107     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2108 wakaba 1.73 scheme => FEATURE_M12N10_REC,
2109     }->{$attr_ln};
2110    
2111 wakaba 1.1 if ($attr_ln eq 'content') {
2112     $content_attr = $attr;
2113     $checker = 1;
2114     } elsif ($attr_ln eq 'name') {
2115     $name_attr = $attr;
2116     $checker = 1;
2117     } elsif ($attr_ln eq 'http-equiv') {
2118     $http_equiv_attr = $attr;
2119     $checker = 1;
2120     } elsif ($attr_ln eq 'charset') {
2121     $charset_attr = $attr;
2122     $checker = 1;
2123 wakaba 1.67 } elsif ($attr_ln eq 'scheme') {
2124 wakaba 1.71 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2125 wakaba 1.67 $checker = sub {};
2126 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2127     $attr_ln !~ /[A-Z]/) {
2128 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
2129     $status = $HTMLDatasetAttrStatus;
2130 wakaba 1.1 } else {
2131     $checker = $HTMLAttrChecker->{$attr_ln}
2132 wakaba 1.67 || $AttrChecker->{$attr_ns}->{$attr_ln}
2133 wakaba 1.1 || $AttrChecker->{$attr_ns}->{''};
2134     }
2135     } else {
2136     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2137 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
2138     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2139     || $AttrStatus->{$attr_ns}->{''};
2140     $status = FEATURE_ALLOWED if not defined $status;
2141 wakaba 1.1 }
2142 wakaba 1.62
2143 wakaba 1.1 if ($checker) {
2144 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
2145 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
2146 wakaba 1.54 #
2147 wakaba 1.1 } else {
2148 wakaba 1.104 $self->{onerror}->(node => $attr,
2149     type => 'unknown attribute',
2150     level => $self->{level}->{uncertain});
2151 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
2152     }
2153    
2154 wakaba 1.82 $self->_attr_status_info ($attr, $status);
2155 wakaba 1.1 }
2156    
2157     if (defined $name_attr) {
2158     if (defined $http_equiv_attr) {
2159     $self->{onerror}->(node => $http_equiv_attr,
2160 wakaba 1.104 type => 'attribute not allowed',
2161     level => $self->{level}->{must});
2162 wakaba 1.1 } elsif (defined $charset_attr) {
2163     $self->{onerror}->(node => $charset_attr,
2164 wakaba 1.104 type => 'attribute not allowed',
2165     level => $self->{level}->{must});
2166 wakaba 1.1 }
2167     my $metadata_name = $name_attr->value;
2168     my $metadata_value;
2169     if (defined $content_attr) {
2170     $metadata_value = $content_attr->value;
2171     } else {
2172 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2173 wakaba 1.104 type => 'attribute missing',
2174     text => 'content',
2175     level => $self->{level}->{must});
2176 wakaba 1.1 $metadata_value = '';
2177     }
2178     } elsif (defined $http_equiv_attr) {
2179     if (defined $charset_attr) {
2180     $self->{onerror}->(node => $charset_attr,
2181 wakaba 1.104 type => 'attribute not allowed',
2182     level => $self->{level}->{must});
2183 wakaba 1.1 }
2184     unless (defined $content_attr) {
2185 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2186 wakaba 1.104 type => 'attribute missing',
2187     text => 'content',
2188     level => $self->{level}->{must});
2189 wakaba 1.1 }
2190     } elsif (defined $charset_attr) {
2191     if (defined $content_attr) {
2192     $self->{onerror}->(node => $content_attr,
2193 wakaba 1.104 type => 'attribute not allowed',
2194     level => $self->{level}->{must});
2195 wakaba 1.1 }
2196     } else {
2197     if (defined $content_attr) {
2198     $self->{onerror}->(node => $content_attr,
2199 wakaba 1.104 type => 'attribute not allowed',
2200     level => $self->{level}->{must});
2201 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2202 wakaba 1.104 type => 'attribute missing:name|http-equiv',
2203     level => $self->{level}->{must});
2204 wakaba 1.1 } else {
2205 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2206 wakaba 1.104 type => 'attribute missing:name|http-equiv|charset',
2207     level => $self->{level}->{must});
2208 wakaba 1.1 }
2209     }
2210    
2211 wakaba 1.32 my $check_charset_decl = sub () {
2212 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
2213 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2214     for my $el (@{$parent->child_nodes}) {
2215     next unless $el->node_type == 1; # ELEMENT_NODE
2216 wakaba 1.40 unless ($el eq $item->{node}) {
2217 wakaba 1.29 ## NOTE: Not the first child element.
2218 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2219 wakaba 1.32 type => 'element not allowed:meta charset',
2220 wakaba 1.104 level => $self->{level}->{must});
2221 wakaba 1.29 }
2222     last;
2223     ## NOTE: Entity references are not supported.
2224     }
2225     } else {
2226 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2227 wakaba 1.32 type => 'element not allowed:meta charset',
2228 wakaba 1.104 level => $self->{level}->{must});
2229 wakaba 1.29 }
2230    
2231 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2232     $self->{onerror}->(node => $item->{node},
2233 wakaba 1.32 type => 'in XML:charset',
2234 wakaba 1.104 level => $self->{level}->{must});
2235 wakaba 1.1 }
2236 wakaba 1.32 }; # $check_charset_decl
2237 wakaba 1.21
2238 wakaba 1.32 my $check_charset = sub ($$) {
2239     my ($attr, $charset_value) = @_;
2240 wakaba 1.21
2241 wakaba 1.91 my $charset;
2242     ($charset, $charset_value)
2243     = $HTMLCharsetChecker->($charset_value, $self, $attr);
2244    
2245 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
2246 wakaba 1.21 if (defined $ic) {
2247     ## TODO: Test for this case
2248     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2249     if ($charset ne $ic_charset) {
2250 wakaba 1.32 $self->{onerror}->(node => $attr,
2251 wakaba 1.104 type => 'mismatched charset name',
2252 wakaba 1.106 text => $ic,
2253 wakaba 1.104 value => $charset_value,
2254     level => $self->{level}->{must});
2255 wakaba 1.21 }
2256     } else {
2257     ## NOTE: MUST, but not checkable, since the document is not originally
2258     ## in serialized form (or the parser does not preserve the input
2259     ## encoding information).
2260 wakaba 1.32 $self->{onerror}->(node => $attr,
2261 wakaba 1.104 type => 'mismatched charset name not checked',
2262     value => $charset_value,
2263     level => $self->{level}->{uncertain});
2264 wakaba 1.21 }
2265    
2266 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
2267     $self->{onerror}->(node => $attr,
2268 wakaba 1.104 type => 'charref in charset',
2269     level => $self->{level}->{must},
2270     layer => 'syntax');
2271 wakaba 1.22 }
2272 wakaba 1.32 }; # $check_charset
2273    
2274     ## TODO: metadata conformance
2275    
2276     ## TODO: pragma conformance
2277     if (defined $http_equiv_attr) { ## An enumerated attribute
2278     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
2279 wakaba 1.33
2280 wakaba 1.85 if ($self->{has_http_equiv}->{$keyword}) {
2281     $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2282     node => $http_equiv_attr,
2283 wakaba 1.104 level => $self->{level}->{must});
2284 wakaba 1.85 } else {
2285     $self->{has_http_equiv}->{$keyword} = 1;
2286     }
2287    
2288     if ($keyword eq 'content-type') {
2289 wakaba 1.58 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2290 wakaba 1.33
2291 wakaba 1.32 $check_charset_decl->();
2292     if ($content_attr) {
2293     my $content = $content_attr->value;
2294 wakaba 1.58 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2295 wakaba 1.132 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2296 wakaba 1.58 =(.+)\z!sx) {
2297 wakaba 1.32 $check_charset->($content_attr, $1);
2298     } else {
2299     $self->{onerror}->(node => $content_attr,
2300     type => 'meta content-type syntax error',
2301 wakaba 1.104 level => $self->{level}->{must});
2302 wakaba 1.85 }
2303     }
2304     } elsif ($keyword eq 'default-style') {
2305     ## ISSUE: Not defined yet in the spec.
2306     } elsif ($keyword eq 'refresh') {
2307     if ($content_attr) {
2308     my $content = $content_attr->value;
2309     if ($content =~ /\A[0-9]+\z/) {
2310     ## NOTE: Valid non-negative integer.
2311     #
2312 wakaba 1.132 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2313 wakaba 1.85 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
2314     Whatpm::URIChecker->check_iri_reference ($content, sub {
2315 wakaba 1.104 $self->{onerror}->(value => $content, @_, node => $content_attr);
2316 wakaba 1.106 }, $self->{level});
2317 wakaba 1.85 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URIs".
2318    
2319     $element_state->{uri_info}->{content}->{node} = $content_attr;
2320     $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2321     ## TODO: absolute
2322     push @{$self->{return}->{uri}->{$content} ||= []},
2323     $element_state->{uri_info}->{content};
2324     } else {
2325     $self->{onerror}->(node => $content_attr,
2326     type => 'refresh:syntax error',
2327 wakaba 1.104 level => $self->{level}->{must});
2328 wakaba 1.32 }
2329     }
2330     } else {
2331     $self->{onerror}->(node => $http_equiv_attr,
2332 wakaba 1.104 type => 'enumerated:invalid',
2333     level => $self->{level}->{must});
2334 wakaba 1.32 }
2335     }
2336    
2337     if (defined $charset_attr) {
2338     $check_charset_decl->();
2339     $check_charset->($charset_attr, $charset_attr->value);
2340 wakaba 1.1 }
2341     },
2342     };
2343    
2344     $Element->{$HTML_NS}->{style} = {
2345 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2346 wakaba 1.40 %HTMLChecker,
2347     check_attrs => $GetHTMLAttrsChecker->({
2348 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2349     media => $HTMLMQAttrChecker,
2350     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2351     ## NOTE: |title| has special semantics for |style|s, but is syntactically
2352     ## not different
2353 wakaba 1.49 }, {
2354     %HTMLAttrStatus,
2355 wakaba 1.82 %XHTML2CommonAttrStatus,
2356 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2357 wakaba 1.82 disabled => FEATURE_XHTML2_ED,
2358 wakaba 1.154 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2359 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_XHTML10_REC,
2360     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2361     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2362     scoped => FEATURE_HTML5_FD,
2363     title => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2364     type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2365 wakaba 1.1 }),
2366 wakaba 1.40 check_start => sub {
2367     my ($self, $item, $element_state) = @_;
2368    
2369 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
2370 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2371 wakaba 1.93 $type = 'text/css' unless defined $type;
2372     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2373     $type = "$1/$2";
2374     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2375     } else {
2376     ## NOTE: We don't know how parameters are handled by UAs. According to
2377     ## HTML5 specification, <style> with unknown parameters in |type=""|
2378     ## must be ignored.
2379     undef $type;
2380     }
2381     if (not defined $type) {
2382     $element_state->{allow_element} = 1; # invalid type=""
2383     } elsif ($type eq 'text/css') {
2384 wakaba 1.40 $element_state->{allow_element} = 0;
2385 wakaba 1.93 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2386     # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2387     # $element_state->{allow_element} = 1;
2388 wakaba 1.40 } else {
2389     $element_state->{allow_element} = 1; # unknown
2390     }
2391 wakaba 1.93 $element_state->{style_type} = $type;
2392 wakaba 1.79
2393     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2394     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2395 wakaba 1.107
2396     $element_state->{text} = '';
2397 wakaba 1.40 },
2398     check_child_element => sub {
2399     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2400     $child_is_transparent, $element_state) = @_;
2401 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2402     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2403 wakaba 1.40 $self->{onerror}->(node => $child_el,
2404     type => 'element not allowed:minus',
2405 wakaba 1.104 level => $self->{level}->{must});
2406 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2407     #
2408     } elsif ($element_state->{allow_element}) {
2409     #
2410     } else {
2411 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2412     level => $self->{level}->{must});
2413 wakaba 1.40 }
2414     },
2415     check_child_text => sub {
2416     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2417 wakaba 1.115 $element_state->{text} .= $child_node->data;
2418 wakaba 1.40 },
2419     check_end => sub {
2420     my ($self, $item, $element_state) = @_;
2421 wakaba 1.93 if (not defined $element_state->{style_type}) {
2422     ## NOTE: Invalid type=""
2423     #
2424     } elsif ($element_state->{style_type} eq 'text/css') {
2425 wakaba 1.40 $self->{onsubdoc}->({s => $element_state->{text},
2426     container_node => $item->{node},
2427 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
2428 wakaba 1.93 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2429     ## NOTE: XML content should be checked by THIS instance of checker
2430     ## as part of normal tree validation. However, we don't know of any
2431     ## XML-based styling language that can be used in HTML <style> element,
2432     ## such that we throw a "style language not supported" error.
2433 wakaba 1.104 $self->{onerror}->(node => $item->{node},
2434     type => 'XML style lang',
2435     text => $element_state->{style_type},
2436     level => $self->{level}->{uncertain});
2437 wakaba 1.93 } else {
2438     ## NOTE: Should we raise some kind of error for,
2439     ## say, <style type="text/plaion">?
2440     $self->{onsubdoc}->({s => $element_state->{text},
2441     container_node => $item->{node},
2442     media_type => $element_state->{style_type},
2443     is_char_string => 1});
2444 wakaba 1.27 }
2445 wakaba 1.40
2446     $HTMLChecker{check_end}->(@_);
2447 wakaba 1.1 },
2448     };
2449 wakaba 1.25 ## ISSUE: Relationship to significant content check?
2450 wakaba 1.1
2451     $Element->{$HTML_NS}->{body} = {
2452 wakaba 1.72 %HTMLFlowContentChecker,
2453 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2454 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2455     alink => $HTMLColorAttrChecker,
2456     background => $HTMLURIAttrChecker,
2457     bgcolor => $HTMLColorAttrChecker,
2458     link => $HTMLColorAttrChecker,
2459     text => $HTMLColorAttrChecker,
2460     vlink => $HTMLColorAttrChecker,
2461     }, {
2462 wakaba 1.49 %HTMLAttrStatus,
2463 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2464 wakaba 1.49 alink => FEATURE_M12N10_REC_DEPRECATED,
2465     background => FEATURE_M12N10_REC_DEPRECATED,
2466     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2467 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2468 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
2469 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2470     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2471 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
2472     vlink => FEATURE_M12N10_REC_DEPRECATED,
2473     }),
2474 wakaba 1.68 check_start => sub {
2475     my ($self, $item, $element_state) = @_;
2476    
2477     $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2478 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2479     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2480 wakaba 1.68 },
2481 wakaba 1.1 };
2482    
2483     $Element->{$HTML_NS}->{section} = {
2484 wakaba 1.72 %HTMLFlowContentChecker,
2485 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2486 wakaba 1.82 check_attrs => $GetHTMLAttrsChecker->({
2487     }, {
2488     %HTMLAttrStatus,
2489     %XHTML2CommonAttrStatus,
2490     }),
2491 wakaba 1.1 };
2492    
2493     $Element->{$HTML_NS}->{nav} = {
2494 wakaba 1.153 status => FEATURE_HTML5_LC,
2495 wakaba 1.72 %HTMLFlowContentChecker,
2496 wakaba 1.1 };
2497    
2498     $Element->{$HTML_NS}->{article} = {
2499 wakaba 1.153 status => FEATURE_HTML5_LC,
2500 wakaba 1.72 %HTMLFlowContentChecker,
2501 wakaba 1.1 };
2502    
2503     $Element->{$HTML_NS}->{blockquote} = {
2504 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2505 wakaba 1.72 %HTMLFlowContentChecker,
2506 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2507 wakaba 1.1 cite => $HTMLURIAttrChecker,
2508 wakaba 1.49 }, {
2509     %HTMLAttrStatus,
2510 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2511 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2512 wakaba 1.154 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2513 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2514 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2515 wakaba 1.1 }),
2516 wakaba 1.66 check_start => sub {
2517     my ($self, $item, $element_state) = @_;
2518    
2519     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2520 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2521     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2522 wakaba 1.66 },
2523 wakaba 1.1 };
2524    
2525     $Element->{$HTML_NS}->{aside} = {
2526 wakaba 1.153 status => FEATURE_HTML5_LC,
2527 wakaba 1.72 %HTMLFlowContentChecker,
2528 wakaba 1.1 };
2529    
2530     $Element->{$HTML_NS}->{h1} = {
2531 wakaba 1.40 %HTMLPhrasingContentChecker,
2532 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2533 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2534     align => $GetHTMLEnumeratedAttrChecker->({
2535     left => 1, center => 1, right => 1, justify => 1,
2536     }),
2537     }, {
2538 wakaba 1.49 %HTMLAttrStatus,
2539 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2540 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2541 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2542 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2543 wakaba 1.49 }),
2544 wakaba 1.40 check_start => sub {
2545     my ($self, $item, $element_state) = @_;
2546     $self->{flag}->{has_hn} = 1;
2547 wakaba 1.79
2548     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2549     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2550 wakaba 1.1 },
2551     };
2552    
2553 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2554 wakaba 1.1
2555 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2556 wakaba 1.1
2557 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2558 wakaba 1.1
2559 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2560 wakaba 1.1
2561 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2562 wakaba 1.1
2563 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
2564    
2565 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
2566 wakaba 1.153 status => FEATURE_HTML5_LC,
2567 wakaba 1.72 %HTMLFlowContentChecker,
2568 wakaba 1.40 check_start => sub {
2569     my ($self, $item, $element_state) = @_;
2570     $self->_add_minus_elements ($element_state,
2571     {$HTML_NS => {qw/header 1 footer 1/}},
2572 wakaba 1.58 $HTMLSectioningContent);
2573 wakaba 1.40 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2574     $self->{flag}->{has_hn} = 0;
2575 wakaba 1.79
2576     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2577     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2578 wakaba 1.40 },
2579     check_end => sub {
2580     my ($self, $item, $element_state) = @_;
2581     $self->_remove_minus_elements ($element_state);
2582     unless ($self->{flag}->{has_hn}) {
2583     $self->{onerror}->(node => $item->{node},
2584 wakaba 1.104 type => 'element missing:hn',
2585     level => $self->{level}->{must});
2586 wakaba 1.40 }
2587     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2588 wakaba 1.1
2589 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2590 wakaba 1.1 },
2591 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
2592 wakaba 1.1 };
2593    
2594     $Element->{$HTML_NS}->{footer} = {
2595 wakaba 1.153 status => FEATURE_HTML5_LC,
2596 wakaba 1.72 %HTMLFlowContentChecker,
2597 wakaba 1.40 check_start => sub {
2598     my ($self, $item, $element_state) = @_;
2599     $self->_add_minus_elements ($element_state,
2600     {$HTML_NS => {footer => 1}},
2601 wakaba 1.58 $HTMLSectioningContent,
2602 wakaba 1.57 $HTMLHeadingContent);
2603 wakaba 1.79
2604     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2605     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2606 wakaba 1.40 },
2607     check_end => sub {
2608     my ($self, $item, $element_state) = @_;
2609     $self->_remove_minus_elements ($element_state);
2610 wakaba 1.1
2611 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2612 wakaba 1.1 },
2613     };
2614    
2615     $Element->{$HTML_NS}->{address} = {
2616 wakaba 1.72 %HTMLFlowContentChecker,
2617 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2618 wakaba 1.110 check_attrs => $GetHTMLAttrsChecker->({
2619     ## TODO: add test
2620     #align => $GetHTMLEnumeratedAttrChecker->({
2621     # left => 1, center => 1, right => 1, justify => 1,
2622     #}),
2623     }, {
2624 wakaba 1.49 %HTMLAttrStatus,
2625 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2626 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2627 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2628 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2629     sdapref => FEATURE_HTML20_RFC,
2630 wakaba 1.49 }),
2631 wakaba 1.40 check_start => sub {
2632     my ($self, $item, $element_state) = @_;
2633     $self->_add_minus_elements ($element_state,
2634     {$HTML_NS => {footer => 1, address => 1}},
2635     $HTMLSectioningContent, $HTMLHeadingContent);
2636 wakaba 1.79
2637     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2638     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2639 wakaba 1.40 },
2640     check_end => sub {
2641     my ($self, $item, $element_state) = @_;
2642     $self->_remove_minus_elements ($element_state);
2643 wakaba 1.29
2644 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
2645 wakaba 1.29 },
2646 wakaba 1.1 };
2647    
2648     $Element->{$HTML_NS}->{p} = {
2649 wakaba 1.40 %HTMLPhrasingContentChecker,
2650 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2651 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2652     align => $GetHTMLEnumeratedAttrChecker->({
2653     left => 1, center => 1, right => 1, justify => 1,
2654     }),
2655     }, {
2656 wakaba 1.49 %HTMLAttrStatus,
2657 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2658 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
2659 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2660 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2661 wakaba 1.49 }),
2662 wakaba 1.1 };
2663    
2664     $Element->{$HTML_NS}->{hr} = {
2665 wakaba 1.40 %HTMLEmptyChecker,
2666 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2667 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
2668     ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2669     }, {
2670 wakaba 1.49 %HTMLAttrStatus,
2671     %HTMLM12NCommonAttrStatus,
2672     align => FEATURE_M12N10_REC_DEPRECATED,
2673 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2674 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
2675 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2676 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
2677     width => FEATURE_M12N10_REC_DEPRECATED,
2678     }),
2679 wakaba 1.1 };
2680    
2681     $Element->{$HTML_NS}->{br} = {
2682 wakaba 1.40 %HTMLEmptyChecker,
2683 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2684 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2685     clear => $GetHTMLEnumeratedAttrChecker->({
2686     left => 1, all => 1, right => 1, none => 1,
2687     }),
2688     }, {
2689 wakaba 1.49 %HTMLAttrStatus,
2690 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2691 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
2692 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2693 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
2694 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2695     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2696 wakaba 1.49 }),
2697 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2698     ## (This requirement is semantic so that we cannot check.)
2699 wakaba 1.1 };
2700    
2701     $Element->{$HTML_NS}->{dialog} = {
2702 wakaba 1.153 status => FEATURE_HTML5_WD,
2703 wakaba 1.40 %HTMLChecker,
2704     check_start => sub {
2705     my ($self, $item, $element_state) = @_;
2706     $element_state->{phase} = 'before dt';
2707 wakaba 1.79
2708     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2709     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2710 wakaba 1.40 },
2711     check_child_element => sub {
2712     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2713     $child_is_transparent, $element_state) = @_;
2714 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2715     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2716 wakaba 1.40 $self->{onerror}->(node => $child_el,
2717     type => 'element not allowed:minus',
2718 wakaba 1.104 level => $self->{level}->{must});
2719 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2720     #
2721     } elsif ($element_state->{phase} eq 'before dt') {
2722     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2723     $element_state->{phase} = 'before dd';
2724     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2725     $self->{onerror}
2726 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2727     text => 'dt',
2728     level => $self->{level}->{must});
2729 wakaba 1.40 $element_state->{phase} = 'before dt';
2730     } else {
2731 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2732     level => $self->{level}->{must});
2733 wakaba 1.40 }
2734     } elsif ($element_state->{phase} eq 'before dd') {
2735     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2736     $element_state->{phase} = 'before dt';
2737     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2738     $self->{onerror}
2739 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2740     text => 'dd',
2741     level => $self->{level}->{must});
2742 wakaba 1.40 $element_state->{phase} = 'before dd';
2743     } else {
2744 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2745     level => $self->{level}->{must});
2746 wakaba 1.1 }
2747 wakaba 1.40 } else {
2748     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2749     }
2750     },
2751     check_child_text => sub {
2752     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2753     if ($has_significant) {
2754 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2755     level => $self->{level}->{must});
2756 wakaba 1.1 }
2757 wakaba 1.40 },
2758     check_end => sub {
2759     my ($self, $item, $element_state) = @_;
2760     if ($element_state->{phase} eq 'before dd') {
2761     $self->{onerror}->(node => $item->{node},
2762 wakaba 1.104 type => 'child element missing',
2763     text => 'dd',
2764     level => $self->{level}->{must});
2765 wakaba 1.1 }
2766 wakaba 1.40
2767     $HTMLChecker{check_end}->(@_);
2768 wakaba 1.1 },
2769     };
2770    
2771     $Element->{$HTML_NS}->{pre} = {
2772 wakaba 1.40 %HTMLPhrasingContentChecker,
2773 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2774 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2775     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2776     }, {
2777 wakaba 1.49 %HTMLAttrStatus,
2778 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2779 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2780 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2781 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
2782     }),
2783 wakaba 1.101 check_end => sub {
2784     my ($self, $item, $element_state) = @_;
2785    
2786     ## TODO: Flag to enable/disable IDL checking?
2787 wakaba 1.145 my $class = $item->{node}->get_attribute_ns (undef, 'class');
2788 wakaba 1.102 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
2789     ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
2790     ## NOTE: pre.code > code.idl-code: WebIDL spec
2791     ## NOTE: pre.idl-code: DOM1 spec
2792     ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
2793     ## NOTE: pre.schema: ReSpec-generated specs
2794 wakaba 1.101 $self->{onsubdoc}->({s => $item->{node}->text_content,
2795     container_node => $item->{node},
2796     media_type => 'text/x-webidl',
2797     is_char_string => 1});
2798     }
2799    
2800 wakaba 1.110 $HTMLPhrasingContentChecker{check_end}->(@_);
2801 wakaba 1.101 },
2802 wakaba 1.1 };
2803    
2804     $Element->{$HTML_NS}->{ol} = {
2805 wakaba 1.40 %HTMLChecker,
2806 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2807 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2808 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
2809 wakaba 1.69 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
2810 wakaba 1.1 start => $HTMLIntegerAttrChecker,
2811 wakaba 1.69 ## TODO: HTML4 |type|
2812 wakaba 1.49 }, {
2813     %HTMLAttrStatus,
2814 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2815 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2816 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2817 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2818     reversed => FEATURE_HTML5_WD,
2819 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2820 wakaba 1.153 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
2821     start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2822 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2823 wakaba 1.1 }),
2824 wakaba 1.40 check_child_element => sub {
2825     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2826     $child_is_transparent, $element_state) = @_;
2827 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2828     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2829 wakaba 1.40 $self->{onerror}->(node => $child_el,
2830     type => 'element not allowed:minus',
2831 wakaba 1.104 level => $self->{level}->{must});
2832 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2833     #
2834     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
2835     #
2836     } else {
2837 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2838     level => $self->{level}->{must});
2839 wakaba 1.1 }
2840 wakaba 1.40 },
2841     check_child_text => sub {
2842     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2843     if ($has_significant) {
2844 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2845     level => $self->{level}->{must});
2846 wakaba 1.1 }
2847     },
2848     };
2849    
2850     $Element->{$HTML_NS}->{ul} = {
2851 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
2852 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2853 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2854     compact => $GetHTMLBooleanAttrChecker->('compact'),
2855 wakaba 1.69 ## TODO: HTML4 |type|
2856     ## TODO: sdaform, align
2857 wakaba 1.68 }, {
2858 wakaba 1.49 %HTMLAttrStatus,
2859 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2860 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2861 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2862 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2863 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2864 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2865     }),
2866 wakaba 1.1 };
2867    
2868 wakaba 1.64 $Element->{$HTML_NS}->{dir} = {
2869     ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
2870     %{$Element->{$HTML_NS}->{ul}},
2871     status => FEATURE_M12N10_REC_DEPRECATED,
2872 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2873     compact => $GetHTMLBooleanAttrChecker->('compact'),
2874     }, {
2875 wakaba 1.64 %HTMLAttrStatus,
2876     %HTMLM12NCommonAttrStatus,
2877     align => FEATURE_HTML2X_RFC,
2878     compact => FEATURE_M12N10_REC_DEPRECATED,
2879 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2880 wakaba 1.64 sdaform => FEATURE_HTML20_RFC,
2881     sdapref => FEATURE_HTML20_RFC,
2882     }),
2883     };
2884    
2885 wakaba 1.1 $Element->{$HTML_NS}->{li} = {
2886 wakaba 1.72 %HTMLFlowContentChecker,
2887 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2888 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2889 wakaba 1.69 ## TODO: HTML4 |type|
2890 wakaba 1.49 value => sub {
2891 wakaba 1.1 my ($self, $attr) = @_;
2892 wakaba 1.152
2893     my $parent_is_ol;
2894 wakaba 1.1 my $parent = $attr->owner_element->manakai_parent_element;
2895     if (defined $parent) {
2896     my $parent_ns = $parent->namespace_uri;
2897     $parent_ns = '' unless defined $parent_ns;
2898     my $parent_ln = $parent->manakai_local_name;
2899 wakaba 1.152 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
2900     }
2901    
2902     unless ($parent_is_ol) {
2903     ## ISSUE: No "MUST" in the spec.
2904     $self->{onerror}->(node => $attr,
2905     type => 'non-ol li value',
2906     level => $self->{level}->{html5_fact});
2907 wakaba 1.1 }
2908 wakaba 1.152
2909 wakaba 1.1 $HTMLIntegerAttrChecker->($self, $attr);
2910 wakaba 1.131 },
2911 wakaba 1.49 }, {
2912     %HTMLAttrStatus,
2913 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2914 wakaba 1.61 align => FEATURE_HTML2X_RFC,
2915 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2916 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2917 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2918 wakaba 1.154 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
2919 wakaba 1.55 # FEATURE_M12N10_REC_DEPRECATED,
2920 wakaba 1.154 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
2921 wakaba 1.82 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
2922 wakaba 1.1 }),
2923 wakaba 1.40 check_child_element => sub {
2924     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2925     $child_is_transparent, $element_state) = @_;
2926     if ($self->{flag}->{in_menu}) {
2927 wakaba 1.152 ## TODO: In <dir> element, then ...
2928 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_element}->(@_);
2929     } else {
2930 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
2931 wakaba 1.40 }
2932     },
2933     check_child_text => sub {
2934     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2935     if ($self->{flag}->{in_menu}) {
2936 wakaba 1.152 ## TODO: In <dir> element, then ...
2937 wakaba 1.40 $HTMLPhrasingContentChecker{check_child_text}->(@_);
2938 wakaba 1.1 } else {
2939 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
2940 wakaba 1.1 }
2941     },
2942     };
2943    
2944     $Element->{$HTML_NS}->{dl} = {
2945 wakaba 1.40 %HTMLChecker,
2946 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2947 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
2948     compact => $GetHTMLBooleanAttrChecker->('compact'),
2949     }, {
2950 wakaba 1.49 %HTMLAttrStatus,
2951 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
2952 wakaba 1.49 compact => FEATURE_M12N10_REC_DEPRECATED,
2953 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
2954 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
2955     sdapref => FEATURE_HTML20_RFC,
2956 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
2957     }),
2958 wakaba 1.40 check_start => sub {
2959     my ($self, $item, $element_state) = @_;
2960     $element_state->{phase} = 'before dt';
2961 wakaba 1.79
2962     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2963     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2964 wakaba 1.40 },
2965     check_child_element => sub {
2966     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2967     $child_is_transparent, $element_state) = @_;
2968 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2969     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2970 wakaba 1.40 $self->{onerror}->(node => $child_el,
2971     type => 'element not allowed:minus',
2972 wakaba 1.104 level => $self->{level}->{must});
2973 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2974     #
2975     } elsif ($element_state->{phase} eq 'in dds') {
2976     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2977     #$element_state->{phase} = 'in dds';
2978     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2979     $element_state->{phase} = 'in dts';
2980     } else {
2981 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2982     level => $self->{level}->{must});
2983 wakaba 1.40 }
2984     } elsif ($element_state->{phase} eq 'in dts') {
2985     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2986     #$element_state->{phase} = 'in dts';
2987     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2988     $element_state->{phase} = 'in dds';
2989     } else {
2990 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2991     level => $self->{level}->{must});
2992 wakaba 1.40 }
2993     } elsif ($element_state->{phase} eq 'before dt') {
2994     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2995     $element_state->{phase} = 'in dts';
2996     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2997     $self->{onerror}
2998 wakaba 1.104 ->(node => $child_el, type => 'ps element missing',
2999     text => 'dt',
3000     level => $self->{level}->{must});
3001 wakaba 1.40 $element_state->{phase} = 'in dds';
3002     } else {
3003 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3004     level => $self->{level}->{must});
3005 wakaba 1.1 }
3006 wakaba 1.40 } else {
3007     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3008 wakaba 1.1 }
3009 wakaba 1.40 },
3010     check_child_text => sub {
3011     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3012     if ($has_significant) {
3013 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3014     level => $self->{level}->{must});
3015 wakaba 1.40 }
3016     },
3017     check_end => sub {
3018     my ($self, $item, $element_state) = @_;
3019     if ($element_state->{phase} eq 'in dts') {
3020     $self->{onerror}->(node => $item->{node},
3021 wakaba 1.104 type => 'child element missing',
3022     text => 'dd',
3023     level => $self->{level}->{must});
3024 wakaba 1.1 }
3025    
3026 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3027 wakaba 1.1 },
3028     };
3029    
3030     $Element->{$HTML_NS}->{dt} = {
3031 wakaba 1.40 %HTMLPhrasingContentChecker,
3032 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3033 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3034     %HTMLAttrStatus,
3035 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3036 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3037 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3038 wakaba 1.49 }),
3039 wakaba 1.1 };
3040    
3041     $Element->{$HTML_NS}->{dd} = {
3042 wakaba 1.72 %HTMLFlowContentChecker,
3043 wakaba 1.154 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3044 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3045     %HTMLAttrStatus,
3046 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3047 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3048 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3049 wakaba 1.49 }),
3050 wakaba 1.1 };
3051    
3052     $Element->{$HTML_NS}->{a} = {
3053 wakaba 1.123 %HTMLTransparentChecker,
3054 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3055 wakaba 1.40 check_attrs => sub {
3056     my ($self, $item, $element_state) = @_;
3057 wakaba 1.1 my %attr;
3058 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
3059 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
3060     $attr_ns = '' unless defined $attr_ns;
3061     my $attr_ln = $attr->manakai_local_name;
3062     my $checker;
3063 wakaba 1.73 my $status;
3064 wakaba 1.1 if ($attr_ns eq '') {
3065 wakaba 1.73 $status = {
3066     %HTMLAttrStatus,
3067 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3068 wakaba 1.73 accesskey => FEATURE_M12N10_REC,
3069     charset => FEATURE_M12N10_REC,
3070 wakaba 1.82 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3071 wakaba 1.73 cryptopts => FEATURE_RFC2659,
3072     dn => FEATURE_RFC2659,
3073 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3074 wakaba 1.153 FEATURE_M12N10_REC,
3075     hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3076     FEATURE_M12N10_REC,
3077     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3078     media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3079 wakaba 1.73 methods => FEATURE_HTML20_RFC,
3080     name => FEATURE_M12N10_REC_DEPRECATED,
3081     nonce => FEATURE_RFC2659,
3082     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3083     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3084 wakaba 1.153 ping => FEATURE_HTML5_WD,
3085 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3086     rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3087 wakaba 1.73 sdapref => FEATURE_HTML20_RFC,
3088 wakaba 1.82 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3089 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3090 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3091     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3092 wakaba 1.73 urn => FEATURE_HTML20_RFC,
3093     }->{$attr_ln};
3094    
3095 wakaba 1.1 $checker = {
3096 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
3097 wakaba 1.91 charset => sub {
3098     my ($self, $attr) = @_;
3099     $HTMLCharsetChecker->($attr->value, @_);
3100     },
3101 wakaba 1.70 ## TODO: HTML4 |coords|
3102 wakaba 1.1 target => $HTMLTargetAttrChecker,
3103     href => $HTMLURIAttrChecker,
3104     ping => $HTMLSpaceURIsAttrChecker,
3105 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3106 wakaba 1.92 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3107 wakaba 1.70 ## TODO: HTML4 |shape|
3108 wakaba 1.1 media => $HTMLMQAttrChecker,
3109 wakaba 1.70 ## TODO: HTML4/XHTML1 |name|
3110 wakaba 1.1 hreflang => $HTMLLanguageTagAttrChecker,
3111     type => $HTMLIMTAttrChecker,
3112     }->{$attr_ln};
3113     if ($checker) {
3114     $attr{$attr_ln} = $attr;
3115 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3116     $attr_ln !~ /[A-Z]/) {
3117 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
3118     $status = $HTMLDatasetAttrStatus;
3119 wakaba 1.1 } else {
3120     $checker = $HTMLAttrChecker->{$attr_ln};
3121     }
3122     }
3123     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3124     || $AttrChecker->{$attr_ns}->{''};
3125 wakaba 1.82 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3126     || $AttrStatus->{$attr_ns}->{''};
3127     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3128 wakaba 1.62
3129 wakaba 1.1 if ($checker) {
3130 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
3131 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
3132 wakaba 1.54 #
3133 wakaba 1.1 } else {
3134 wakaba 1.104 $self->{onerror}->(node => $attr,
3135     type => 'unknown attribute',
3136     level => $self->{level}->{uncertain});
3137 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
3138 wakaba 1.1 }
3139 wakaba 1.49
3140 wakaba 1.82 $self->_attr_status_info ($attr, $status);
3141 wakaba 1.1 }
3142    
3143 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3144 wakaba 1.4 if (defined $attr{href}) {
3145     $self->{has_hyperlink_element} = 1;
3146 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
3147 wakaba 1.4 } else {
3148 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
3149     if (defined $attr{$_}) {
3150     $self->{onerror}->(node => $attr{$_},
3151 wakaba 1.104 type => 'attribute not allowed',
3152     level => $self->{level}->{must});
3153 wakaba 1.1 }
3154     }
3155     }
3156 wakaba 1.66
3157     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3158 wakaba 1.1 },
3159 wakaba 1.40 check_start => sub {
3160     my ($self, $item, $element_state) = @_;
3161     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3162 wakaba 1.79
3163     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3164     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3165 wakaba 1.40 },
3166     check_end => sub {
3167     my ($self, $item, $element_state) = @_;
3168     $self->_remove_minus_elements ($element_state);
3169 wakaba 1.59 delete $self->{flag}->{in_a_href}
3170     unless $element_state->{in_a_href_original};
3171 wakaba 1.1
3172 wakaba 1.123 $HTMLTransparentChecker{check_end}->(@_);
3173 wakaba 1.1 },
3174     };
3175    
3176     $Element->{$HTML_NS}->{q} = {
3177 wakaba 1.153 status => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3178 wakaba 1.40 %HTMLPhrasingContentChecker,
3179     check_attrs => $GetHTMLAttrsChecker->({
3180 wakaba 1.50 cite => $HTMLURIAttrChecker,
3181     }, {
3182 wakaba 1.49 %HTMLAttrStatus,
3183 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3184 wakaba 1.153 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3185     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3186 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3187     sdasuff => FEATURE_HTML2X_RFC,
3188 wakaba 1.1 }),
3189 wakaba 1.66 check_start => sub {
3190     my ($self, $item, $element_state) = @_;
3191    
3192     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3193 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3194     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3195 wakaba 1.66 },
3196 wakaba 1.1 };
3197 wakaba 1.75 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3198     ## placed inside the <code>q</code> element." Though we cannot test the
3199     ## element against this requirement since it incluides a semantic bit,
3200     ## it might be possible to inform of the existence of quotation marks OUTSIDE
3201     ## the |q| element.
3202 wakaba 1.1
3203     $Element->{$HTML_NS}->{cite} = {
3204 wakaba 1.40 %HTMLPhrasingContentChecker,
3205 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3206 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3207     %HTMLAttrStatus,
3208 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3209 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3210 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3211 wakaba 1.49 }),
3212 wakaba 1.1 };
3213    
3214     $Element->{$HTML_NS}->{em} = {
3215 wakaba 1.40 %HTMLPhrasingContentChecker,
3216 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3217 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3218     %HTMLAttrStatus,
3219 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3220 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3221 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3222 wakaba 1.49 }),
3223 wakaba 1.1 };
3224    
3225     $Element->{$HTML_NS}->{strong} = {
3226 wakaba 1.40 %HTMLPhrasingContentChecker,
3227 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3228 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3229     %HTMLAttrStatus,
3230 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3231 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3232 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3233 wakaba 1.49 }),
3234 wakaba 1.1 };
3235    
3236     $Element->{$HTML_NS}->{small} = {
3237 wakaba 1.40 %HTMLPhrasingContentChecker,
3238 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3239 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3240     %HTMLAttrStatus,
3241     %HTMLM12NCommonAttrStatus,
3242 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3243 wakaba 1.49 }),
3244 wakaba 1.1 };
3245    
3246 wakaba 1.51 $Element->{$HTML_NS}->{big} = {
3247     %HTMLPhrasingContentChecker,
3248     status => FEATURE_M12N10_REC,
3249     check_attrs => $GetHTMLAttrsChecker->({}, {
3250     %HTMLAttrStatus,
3251     %HTMLM12NCommonAttrStatus,
3252 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3253 wakaba 1.51 }),
3254     };
3255    
3256 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
3257 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3258 wakaba 1.40 %HTMLPhrasingContentChecker,
3259 wakaba 1.1 };
3260    
3261     $Element->{$HTML_NS}->{dfn} = {
3262 wakaba 1.40 %HTMLPhrasingContentChecker,
3263 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3264 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3265     %HTMLAttrStatus,
3266 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3267 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3268 wakaba 1.49 }),
3269 wakaba 1.40 check_start => sub {
3270     my ($self, $item, $element_state) = @_;
3271     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3272 wakaba 1.1
3273 wakaba 1.40 my $node = $item->{node};
3274 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
3275     unless (defined $term) {
3276     for my $child (@{$node->child_nodes}) {
3277     if ($child->node_type == 1) { # ELEMENT_NODE
3278     if (defined $term) {
3279     undef $term;
3280     last;
3281     } elsif ($child->manakai_local_name eq 'abbr') {
3282     my $nsuri = $child->namespace_uri;
3283     if (defined $nsuri and $nsuri eq $HTML_NS) {
3284     my $attr = $child->get_attribute_node_ns (undef, 'title');
3285     if ($attr) {
3286     $term = $attr->value;
3287     }
3288     }
3289     }
3290     } elsif ($child->node_type == 3 or $child->node_type == 4) {
3291     ## TEXT_NODE or CDATA_SECTION_NODE
3292 wakaba 1.132 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3293 wakaba 1.1 next;
3294     }
3295     undef $term;
3296     last;
3297     }
3298     }
3299     unless (defined $term) {
3300     $term = $node->text_content;
3301     }
3302     }
3303     if ($self->{term}->{$term}) {
3304     push @{$self->{term}->{$term}}, $node;
3305     } else {
3306     $self->{term}->{$term} = [$node];
3307     }
3308 wakaba 1.77 ## ISSUE: The HTML5 definition for the defined term does not work with
3309     ## |ruby| unless |dfn| has |title|.
3310 wakaba 1.79
3311     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3312     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3313 wakaba 1.40 },
3314     check_end => sub {
3315     my ($self, $item, $element_state) = @_;
3316     $self->_remove_minus_elements ($element_state);
3317 wakaba 1.1
3318 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3319 wakaba 1.1 },
3320     };
3321    
3322     $Element->{$HTML_NS}->{abbr} = {
3323 wakaba 1.40 %HTMLPhrasingContentChecker,
3324 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3325 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3326     %HTMLAttrStatus,
3327 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3328     full => FEATURE_XHTML2_ED,
3329 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3330 wakaba 1.49 }),
3331 wakaba 1.77 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3332     ## number (plural vs singular) must match the grammatical number of the
3333     ## contents of the element." Though this can be checked by machine,
3334     ## it requires language-specific knowledge and dictionary, such that
3335     ## we don't support the check of the requirement.
3336     ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3337 wakaba 1.49 };
3338    
3339     $Element->{$HTML_NS}->{acronym} = {
3340     %HTMLPhrasingContentChecker,
3341     status => FEATURE_M12N10_REC,
3342     check_attrs => $GetHTMLAttrsChecker->({}, {
3343     %HTMLAttrStatus,
3344     %HTMLM12NCommonAttrStatus,
3345 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3346 wakaba 1.49 }),
3347 wakaba 1.1 };
3348    
3349     $Element->{$HTML_NS}->{time} = {
3350 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3351 wakaba 1.40 %HTMLPhrasingContentChecker,
3352     check_attrs => $GetHTMLAttrsChecker->({
3353 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
3354 wakaba 1.49 }, {
3355     %HTMLAttrStatus,
3356     %HTMLM12NCommonAttrStatus,
3357 wakaba 1.72 datetime => FEATURE_HTML5_FD,
3358 wakaba 1.1 }),
3359 wakaba 1.168 ## TODO: Update definition
3360 wakaba 1.1 ## TODO: Write tests
3361 wakaba 1.40 check_end => sub {
3362     my ($self, $item, $element_state) = @_;
3363 wakaba 1.1
3364 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3365 wakaba 1.1 my $input;
3366     my $reg_sp;
3367     my $input_node;
3368     if ($attr) {
3369     $input = $attr->value;
3370 wakaba 1.132 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]*/;
3371 wakaba 1.1 $input_node = $attr;
3372     } else {
3373 wakaba 1.40 $input = $item->{node}->text_content;
3374 wakaba 1.112 $reg_sp = qr/\p{WhiteSpace}*/;
3375 wakaba 1.40 $input_node = $item->{node};
3376 wakaba 1.1
3377     ## ISSUE: What is the definition for "successfully extracts a date
3378     ## or time"? If the algorithm says the string is invalid but
3379     ## return some date or time, is it "successfully"?
3380     }
3381    
3382     my $hour;
3383     my $minute;
3384     my $second;
3385     if ($input =~ /
3386     \A
3387 wakaba 1.112 $reg_sp
3388 wakaba 1.1 ([0-9]+) # 1
3389     (?>
3390     -([0-9]+) # 2
3391 wakaba 1.112 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3392     $reg_sp
3393 wakaba 1.1 (?>
3394     T
3395 wakaba 1.112 $reg_sp
3396 wakaba 1.1 )?
3397     ([0-9]+) # 4
3398     :([0-9]+) # 5
3399     (?>
3400     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
3401     )?
3402 wakaba 1.112 $reg_sp
3403 wakaba 1.1 (?>
3404     Z
3405 wakaba 1.112 $reg_sp
3406 wakaba 1.1 |
3407     [+-]([0-9]+):([0-9]+) # 7, 8
3408 wakaba 1.112 $reg_sp
3409 wakaba 1.1 )?
3410     \z
3411     |
3412     :([0-9]+) # 9
3413     (?>
3414     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
3415     )?
3416 wakaba 1.112 $reg_sp
3417     \z
3418 wakaba 1.1 )
3419     /x) {
3420     if (defined $2) { ## YYYY-MM-DD T? hh:mm
3421     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3422     length $4 != 2 or length $5 != 2) {
3423     $self->{onerror}->(node => $input_node,
3424 wakaba 1.104 type => 'dateortime:syntax error',
3425     level => $self->{level}->{must});
3426 wakaba 1.1 }
3427    
3428     if (1 <= $2 and $2 <= 12) {
3429 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3430     level => $self->{level}->{must})
3431 wakaba 1.1 if $3 < 1 or
3432     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3433 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3434     level => $self->{level}->{must})
3435 wakaba 1.1 if $2 == 2 and $3 == 29 and
3436     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3437     } else {
3438     $self->{onerror}->(node => $input_node,
3439 wakaba 1.104 type => 'datetime:bad month',
3440     level => $self->{level}->{must});
3441 wakaba 1.1 }
3442    
3443     ($hour, $minute, $second) = ($4, $5, $6);
3444    
3445     if (defined $7) { ## [+-]hh:mm
3446     if (length $7 != 2 or length $8 != 2) {
3447     $self->{onerror}->(node => $input_node,
3448 wakaba 1.104 type => 'dateortime:syntax error',
3449     level => $self->{level}->{must});
3450 wakaba 1.1 }
3451    
3452     $self->{onerror}->(node => $input_node,
3453 wakaba 1.104 type => 'datetime:bad timezone hour',
3454     level => $self->{level}->{must})
3455 wakaba 1.1 if $7 > 23;
3456     $self->{onerror}->(node => $input_node,
3457 wakaba 1.104 type => 'datetime:bad timezone minute',
3458     level => $self->{level}->{must})
3459 wakaba 1.1 if $8 > 59;
3460     }
3461     } else { ## hh:mm
3462     if (length $1 != 2 or length $9 != 2) {
3463     $self->{onerror}->(node => $input_node,
3464 wakaba 1.104 type => qq'dateortime:syntax error',
3465     level => $self->{level}->{must});
3466 wakaba 1.1 }
3467    
3468     ($hour, $minute, $second) = ($1, $9, $10);
3469     }
3470    
3471 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3472     level => $self->{level}->{must}) if $hour > 23;
3473     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3474     level => $self->{level}->{must}) if $minute > 59;
3475 wakaba 1.1
3476     if (defined $second) { ## s
3477     ## NOTE: Integer part of second don't have to have length of two.
3478    
3479     if (substr ($second, 0, 1) eq '.') {
3480     $self->{onerror}->(node => $input_node,
3481 wakaba 1.104 type => 'dateortime:syntax error',
3482     level => $self->{level}->{must});
3483 wakaba 1.1 }
3484    
3485 wakaba 1.104 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3486     level => $self->{level}->{must}) if $second >= 60;
3487 wakaba 1.1 }
3488     } else {
3489     $self->{onerror}->(node => $input_node,
3490 wakaba 1.104 type => 'dateortime:syntax error',
3491     level => $self->{level}->{must});
3492 wakaba 1.1 }
3493    
3494 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
3495 wakaba 1.1 },
3496     };
3497    
3498     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3499 wakaba 1.77 ## TODO: value inequalities (HTML5 revision 1463)
3500 wakaba 1.113 ## TODO: content checking
3501     ## TODO: content or value must contain number (rev 2053)
3502 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3503 wakaba 1.40 %HTMLPhrasingContentChecker,
3504     check_attrs => $GetHTMLAttrsChecker->({
3505 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3506     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3507     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3508     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3509     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3510     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3511 wakaba 1.50 }, {
3512     %HTMLAttrStatus,
3513     high => FEATURE_HTML5_DEFAULT,
3514     low => FEATURE_HTML5_DEFAULT,
3515     max => FEATURE_HTML5_DEFAULT,
3516     min => FEATURE_HTML5_DEFAULT,
3517     optimum => FEATURE_HTML5_DEFAULT,
3518     value => FEATURE_HTML5_DEFAULT,
3519 wakaba 1.1 }),
3520     };
3521    
3522     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
3523 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
3524 wakaba 1.40 %HTMLPhrasingContentChecker,
3525     check_attrs => $GetHTMLAttrsChecker->({
3526 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
3527     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
3528 wakaba 1.50 }, {
3529     %HTMLAttrStatus,
3530     max => FEATURE_HTML5_DEFAULT,
3531     value => FEATURE_HTML5_DEFAULT,
3532 wakaba 1.1 }),
3533     };
3534    
3535     $Element->{$HTML_NS}->{code} = {
3536 wakaba 1.40 %HTMLPhrasingContentChecker,
3537 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3538 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3539     %HTMLAttrStatus,
3540 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3541 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3542 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3543 wakaba 1.49 }),
3544 wakaba 1.1 };
3545    
3546     $Element->{$HTML_NS}->{var} = {
3547 wakaba 1.40 %HTMLPhrasingContentChecker,
3548 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3549 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3550     %HTMLAttrStatus,
3551 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3552 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3553 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3554 wakaba 1.49 }),
3555 wakaba 1.1 };
3556    
3557     $Element->{$HTML_NS}->{samp} = {
3558 wakaba 1.40 %HTMLPhrasingContentChecker,
3559 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3560 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3561     %HTMLAttrStatus,
3562 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3563 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3564 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3565 wakaba 1.49 }),
3566 wakaba 1.1 };
3567    
3568     $Element->{$HTML_NS}->{kbd} = {
3569 wakaba 1.40 %HTMLPhrasingContentChecker,
3570 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3571 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3572     %HTMLAttrStatus,
3573 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3574 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3575 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3576 wakaba 1.49 }),
3577 wakaba 1.1 };
3578    
3579     $Element->{$HTML_NS}->{sub} = {
3580 wakaba 1.40 %HTMLPhrasingContentChecker,
3581 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3582 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3583     %HTMLAttrStatus,
3584 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3585 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3586 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3587 wakaba 1.49 }),
3588 wakaba 1.1 };
3589    
3590 wakaba 1.51 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3591 wakaba 1.1
3592     $Element->{$HTML_NS}->{span} = {
3593 wakaba 1.40 %HTMLPhrasingContentChecker,
3594 wakaba 1.82 status => FEATURE_HTML5_DEFAULT | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3595 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3596     %HTMLAttrStatus,
3597 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
3598 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
3599     dataformatas => FEATURE_HTML4_REC_RESERVED,
3600     datasrc => FEATURE_HTML4_REC_RESERVED,
3601 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3602 wakaba 1.61 sdaform => FEATURE_HTML2X_RFC,
3603 wakaba 1.49 }),
3604 wakaba 1.1 };
3605    
3606     $Element->{$HTML_NS}->{i} = {
3607 wakaba 1.40 %HTMLPhrasingContentChecker,
3608 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3609     check_attrs => $GetHTMLAttrsChecker->({}, {
3610     %HTMLAttrStatus,
3611     %HTMLM12NCommonAttrStatus,
3612 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3613 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3614 wakaba 1.49 }),
3615 wakaba 1.1 };
3616    
3617 wakaba 1.51 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3618    
3619 wakaba 1.61 $Element->{$HTML_NS}->{tt} = {
3620     %HTMLPhrasingContentChecker,
3621     status => FEATURE_M12N10_REC,
3622     check_attrs => $GetHTMLAttrsChecker->({}, {
3623     %HTMLAttrStatus,
3624     %HTMLM12NCommonAttrStatus,
3625 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3626 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
3627     }),
3628     };
3629 wakaba 1.51
3630     $Element->{$HTML_NS}->{s} = {
3631 wakaba 1.40 %HTMLPhrasingContentChecker,
3632 wakaba 1.51 status => FEATURE_M12N10_REC_DEPRECATED,
3633 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
3634     %HTMLAttrStatus,
3635     %HTMLM12NCommonAttrStatus,
3636 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3637 wakaba 1.49 }),
3638 wakaba 1.1 };
3639    
3640 wakaba 1.51 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3641    
3642     $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3643    
3644 wakaba 1.1 $Element->{$HTML_NS}->{bdo} = {
3645 wakaba 1.40 %HTMLPhrasingContentChecker,
3646 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3647 wakaba 1.40 check_attrs => sub {
3648     my ($self, $item, $element_state) = @_;
3649 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
3650     %HTMLAttrStatus,
3651 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3652     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3653     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3654     style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3655     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3656     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
3657 wakaba 1.61 sdapref => FEATURE_HTML2X_RFC,
3658     sdasuff => FEATURE_HTML2X_RFC,
3659 wakaba 1.49 })->($self, $item, $element_state);
3660 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3661     $self->{onerror}->(node => $item->{node},
3662 wakaba 1.104 type => 'attribute missing',
3663     text => 'dir',
3664     level => $self->{level}->{must});
3665 wakaba 1.1 }
3666     },
3667     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
3668     };
3669    
3670 wakaba 1.99 $Element->{$HTML_NS}->{ruby} = {
3671     %HTMLPhrasingContentChecker,
3672     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3673     check_attrs => $GetHTMLAttrsChecker->({}, {
3674     %HTMLAttrStatus,
3675     %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
3676 wakaba 1.153 lang => FEATURE_HTML5_WD,
3677 wakaba 1.99 }),
3678     check_start => sub {
3679     my ($self, $item, $element_state) = @_;
3680    
3681     $element_state->{phase} = 'before-rb';
3682     #$element_state->{has_sig}
3683 wakaba 1.100
3684     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3685     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3686 wakaba 1.99 },
3687     ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
3688     check_child_element => sub {
3689     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3690     $child_is_transparent, $element_state) = @_;
3691 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3692     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3693 wakaba 1.99 $self->{onerror}->(node => $child_el,
3694     type => 'element not allowed:minus',
3695 wakaba 1.104 level => $self->{level}->{must});
3696 wakaba 1.99 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3697     #
3698     } elsif ($element_state->{phase} eq 'before-rb') {
3699     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3700     $element_state->{phase} = 'in-rb';
3701     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3702     $self->{onerror}->(node => $child_el,
3703 wakaba 1.104 level => $self->{level}->{should},
3704     type => 'no significant content before');
3705 wakaba 1.99 $element_state->{phase} = 'after-rt';
3706     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3707     $self->{onerror}->(node => $child_el,
3708 wakaba 1.104 level => $self->{level}->{should},
3709     type => 'no significant content before');
3710 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3711     } else {
3712     $self->{onerror}->(node => $child_el,
3713 wakaba 1.104 type => 'element not allowed:ruby base',
3714     level => $self->{level}->{must});
3715 wakaba 1.99 $element_state->{phase} = 'in-rb';
3716     }
3717     } elsif ($element_state->{phase} eq 'in-rb') {
3718     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3719     #$element_state->{phase} = 'in-rb';
3720     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3721     unless ($element_state->{has_significant}) {
3722     $self->{onerror}->(node => $child_el,
3723 wakaba 1.104 level => $self->{level}->{should},
3724     type => 'no significant content before');
3725 wakaba 1.99 }
3726     $element_state->{phase} = 'after-rt';
3727     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3728     unless ($element_state->{has_significant}) {
3729     $self->{onerror}->(node => $child_el,
3730 wakaba 1.104 level => $self->{level}->{should},
3731     type => 'no significant content before');
3732 wakaba 1.99 }
3733     $element_state->{phase} = 'after-rp1';
3734     } else {
3735     $self->{onerror}->(node => $child_el,
3736 wakaba 1.104 type => 'element not allowed:ruby base',
3737     level => $self->{level}->{must});
3738 wakaba 1.99 #$element_state->{phase} = 'in-rb';
3739     }
3740     } elsif ($element_state->{phase} eq 'after-rt') {
3741     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3742     if ($element_state->{has_significant}) {
3743     $element_state->{has_sig} = 1;
3744     delete $element_state->{has_significant};
3745     }
3746     $element_state->{phase} = 'in-rb';
3747     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3748     $self->{onerror}->(node => $child_el,
3749 wakaba 1.104 level => $self->{level}->{should},
3750     type => 'no significant content before');
3751 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3752     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3753     $self->{onerror}->(node => $child_el,
3754 wakaba 1.104 level => $self->{level}->{should},
3755     type => 'no significant content before');
3756 wakaba 1.99 #$element_state->{phase} = 'after-rt';
3757     } else {
3758     $self->{onerror}->(node => $child_el,
3759 wakaba 1.104 type => 'element not allowed:ruby base',
3760     level => $self->{level}->{must});
3761 wakaba 1.99 if ($element_state->{has_significant}) {
3762     $element_state->{has_sig} = 1;
3763     delete $element_state->{has_significant};
3764     }
3765     $element_state->{phase} = 'in-rb';
3766     }
3767     } elsif ($element_state->{phase} eq 'after-rp1') {
3768     if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3769     $element_state->{phase} = 'after-rp-rt';
3770     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3771     $self->{onerror}->(node => $child_el,
3772 wakaba 1.104 type => 'ps element missing',
3773     text => 'rt',
3774     level => $self->{level}->{must});
3775 wakaba 1.99 $element_state->{phase} = 'after-rp2';
3776     } else {
3777     $self->{onerror}->(node => $child_el,
3778 wakaba 1.104 type => 'ps element missing',
3779     text => 'rt',
3780     level => $self->{level}->{must});
3781 wakaba 1.99 $self->{onerror}->(node => $child_el,
3782 wakaba 1.104 type => 'ps element missing',
3783     text => 'rp',
3784     level => $self->{level}->{must});
3785 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3786     $self->{onerror}->(node => $child_el,
3787 wakaba 1.104 type => 'element not allowed:ruby base',
3788     level => $self->{level}->{must});
3789 wakaba 1.99 }
3790     if ($element_state->{has_significant}) {
3791     $element_state->{has_sig} = 1;
3792     delete $element_state->{has_significant};
3793     }
3794     $element_state->{phase} = 'in-rb';
3795     }
3796     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3797     if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3798     $element_state->{phase} = 'after-rp2';
3799     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3800     $self->{onerror}->(node => $child_el,
3801 wakaba 1.104 type => 'ps element missing',
3802     text => 'rp',
3803     level => $self->{level}->{must});
3804 wakaba 1.99 $self->{onerror}->(node => $child_el,
3805 wakaba 1.104 level => $self->{level}->{should},
3806     type => 'no significant content before');
3807 wakaba 1.99 $element_state->{phase} = 'after-rt';
3808     } else {
3809     $self->{onerror}->(node => $child_el,
3810 wakaba 1.104 type => 'ps element missing',
3811     text => 'rp',
3812     level => $self->{level}->{must});
3813 wakaba 1.99 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3814     $self->{onerror}->(node => $child_el,
3815 wakaba 1.104 type => 'element not allowed:ruby base',
3816     level => $self->{level}->{must});
3817 wakaba 1.99 }
3818     if ($element_state->{has_significant}) {
3819     $element_state->{has_sig} = 1;
3820     delete $element_state->{has_significant};
3821     }
3822     $element_state->{phase} = 'in-rb';
3823     }
3824     } elsif ($element_state->{phase} eq 'after-rp2') {
3825     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3826     if ($element_state->{has_significant}) {
3827     $element_state->{has_sig} = 1;
3828     delete $element_state->{has_significant};
3829     }
3830     $element_state->{phase} = 'in-rb';
3831     } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
3832     $self->{onerror}->(node => $child_el,
3833 wakaba 1.104 level => $self->{level}->{should},
3834     type => 'no significant content before');
3835 wakaba 1.99 $element_state->{phase} = 'after-rt';
3836     } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
3837     $self->{onerror}->(node => $child_el,
3838 wakaba 1.104 level => $self->{level}->{should},
3839     type => 'no significant content before');
3840 wakaba 1.99 $element_state->{phase} = 'after-rp1';
3841     } else {
3842     $self->{onerror}->(node => $child_el,
3843 wakaba 1.104 type => 'element not allowed:ruby base',
3844     level => $self->{level}->{must});
3845 wakaba 1.99 if ($element_state->{has_significant}) {
3846     $element_state->{has_sig} = 1;
3847     delete $element_state->{has_significant};
3848     }
3849     $element_state->{phase} = 'in-rb';
3850     }
3851     } else {
3852     die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
3853     }
3854     },
3855     check_child_text => sub {
3856     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3857     if ($has_significant) {
3858     if ($element_state->{phase} eq 'before-rb') {
3859     $element_state->{phase} = 'in-rb';
3860     } elsif ($element_state->{phase} eq 'in-rb') {
3861     #
3862     } elsif ($element_state->{phase} eq 'after-rt' or
3863     $element_state->{phase} eq 'after-rp2') {
3864     $element_state->{phase} = 'in-rb';
3865     } elsif ($element_state->{phase} eq 'after-rp1') {
3866     $self->{onerror}->(node => $child_node,
3867 wakaba 1.104 type => 'ps element missing',
3868     text => 'rt',
3869     level => $self->{level}->{must});
3870 wakaba 1.99 $self->{onerror}->(node => $child_node,
3871 wakaba 1.104 type => 'ps element missing',
3872     text => 'rp',
3873     level => $self->{level}->{must});
3874 wakaba 1.99 $element_state->{phase} = 'in-rb';
3875     } elsif ($element_state->{phase} eq 'after-rp-rt') {
3876     $self->{onerror}->(node => $child_node,
3877 wakaba 1.104 type => 'ps element missing',
3878     text => 'rp',
3879     level => $self->{level}->{must});
3880 wakaba 1.99 $element_state->{phase} = 'in-rb';
3881     } else {
3882     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3883     }
3884     }
3885     },
3886     check_end => sub {
3887     my ($self, $item, $element_state) = @_;
3888     $self->_remove_minus_elements ($element_state);
3889    
3890     if ($element_state->{phase} eq 'before-rb') {
3891     $self->{onerror}->(node => $item->{node},
3892 wakaba 1.104 level => $self->{level}->{should},
3893 wakaba 1.99 type => 'no significant content');
3894     $self->{onerror}->(node => $item->{node},
3895 wakaba 1.104 type => 'element missing',
3896     text => 'rt',
3897     level => $self->{level}->{must});
3898 wakaba 1.99 } elsif ($element_state->{phase} eq 'in-rb') {
3899     unless ($element_state->{has_significant}) {
3900     $self->{onerror}->(node => $item->{node},
3901 wakaba 1.104 level => $self->{level}->{should},
3902     type => 'no significant content at the end');
3903 wakaba 1.99 }
3904     $self->{onerror}->(node => $item->{node},
3905 wakaba 1.104 type => 'element missing',
3906     text => 'rt',
3907     level => $self->{level}->{must});
3908 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rt' or
3909     $element_state->{phase} eq 'after-rp2') {
3910     #
3911     } elsif ($element_state->{phase} eq 'after-rp1') {
3912     $self->{onerror}->(node => $item->{node},
3913 wakaba 1.104 type => 'element missing',
3914     text => 'rt',
3915     level => $self->{level}->{must});
3916 wakaba 1.99 $self->{onerror}->(node => $item->{node},
3917 wakaba 1.104 type => 'element missing',
3918     text => 'rp',
3919     level => $self->{level}->{must});
3920 wakaba 1.99 } elsif ($element_state->{phase} eq 'after-rp-rt') {
3921     $self->{onerror}->(node => $item->{node},
3922 wakaba 1.104 type => 'element missing',
3923     text => 'rp',
3924     level => $self->{level}->{must});
3925 wakaba 1.99 } else {
3926     die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
3927     }
3928    
3929     ## NOTE: A modified version of |check_end| of %AnyChecker.
3930     if ($element_state->{has_significant} or $element_state->{has_sig}) {
3931     $item->{real_parent_state}->{has_significant} = 1;
3932     }
3933     },
3934     };
3935    
3936     $Element->{$HTML_NS}->{rt} = {
3937     %HTMLPhrasingContentChecker,
3938     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3939     check_attrs => $GetHTMLAttrsChecker->({}, {
3940     %HTMLAttrStatus,
3941     %HTMLM12NXHTML2CommonAttrStatus,
3942 wakaba 1.153 lang => FEATURE_HTML5_WD,
3943 wakaba 1.99 }),
3944     };
3945    
3946     $Element->{$HTML_NS}->{rp} = {
3947     %HTMLTextChecker,
3948     status => FEATURE_HTML5_DEFAULT | FEATURE_RUBY_REC,
3949     check_attrs => $GetHTMLAttrsChecker->({}, {
3950     %HTMLAttrStatus,
3951     %HTMLM12NXHTML2CommonAttrStatus,
3952 wakaba 1.153 lang => FEATURE_HTML5_WD,
3953 wakaba 1.99 }),
3954 wakaba 1.100 check_start => sub {
3955 wakaba 1.99 my ($self, $item, $element_state) = @_;
3956     $element_state->{text} = '';
3957 wakaba 1.100
3958     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3959     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3960 wakaba 1.99 },
3961     check_child_text => sub {
3962     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3963     if ($has_significant) {
3964     $element_state->{text} .= $child_node->data;
3965     ## NOTE: |<rp> <!---->(</rp>| is allowed.
3966     }
3967     },
3968     check_end => sub {
3969     my ($self, $item, $element_state) = @_;
3970    
3971     my $p_class = ($item->{parent_state} and
3972     $item->{parent_state}->{phase} and
3973     $item->{parent_state}->{phase} eq 'after-rp2')
3974     ? qr/\p{Pe}/ : qr/\p{Ps}/;
3975     if ($element_state->{text} =~ /\A$p_class\z/) {
3976 wakaba 1.132 #=~ /\A[\x09\x0A\x0C\x0D\x20]*${p_class}[\x09\x0A\x0C\x0D\x20]*\z/) {
3977 wakaba 1.99 #
3978     } else {
3979     $self->{onerror}->(node => $item->{node},
3980 wakaba 1.104 type => 'rp:syntax error',
3981     level => $self->{level}->{must});
3982 wakaba 1.99 }
3983    
3984     $HTMLTextChecker{check_end}->(@_);
3985     },
3986     };
3987    
3988 wakaba 1.29 =pod
3989    
3990     ## TODO:
3991    
3992     +
3993     + <p>Partly because of the confusion described above, authors are
3994     + strongly recommended to always mark up all paragraphs with the
3995     + <code>p</code> element, and to not have any <code>ins</code> or
3996     + <code>del</code> elements that cross across any <span
3997     + title="paragraph">implied paragraphs</span>.</p>
3998     +
3999     (An informative note)
4000    
4001     <p><code>ins</code> elements should not cross <span
4002     + title="paragraph">implied paragraph</span> boundaries.</p>
4003     (normative)
4004    
4005     + <p><code>del</code> elements should not cross <span
4006     + title="paragraph">implied paragraph</span> boundaries.</p>
4007     (normative)
4008    
4009     =cut
4010    
4011 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
4012 wakaba 1.40 %HTMLTransparentChecker,
4013 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4014 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4015 wakaba 1.1 cite => $HTMLURIAttrChecker,
4016 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4017 wakaba 1.49 }, {
4018     %HTMLAttrStatus,
4019     %HTMLM12NCommonAttrStatus,
4020 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4021 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4022     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4023 wakaba 1.1 }),
4024 wakaba 1.66 check_start => sub {
4025     my ($self, $item, $element_state) = @_;
4026    
4027     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4028 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4029     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4030 wakaba 1.66 },
4031 wakaba 1.1 };
4032    
4033     $Element->{$HTML_NS}->{del} = {
4034 wakaba 1.40 %HTMLTransparentChecker,
4035 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4036 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4037 wakaba 1.1 cite => $HTMLURIAttrChecker,
4038 wakaba 1.168 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4039 wakaba 1.49 }, {
4040     %HTMLAttrStatus,
4041     %HTMLM12NCommonAttrStatus,
4042 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4043 wakaba 1.153 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4044     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4045 wakaba 1.1 }),
4046 wakaba 1.40 check_end => sub {
4047     my ($self, $item, $element_state) = @_;
4048     if ($element_state->{has_significant}) {
4049     ## NOTE: Significantness flag does not propagate.
4050     } elsif ($item->{transparent}) {
4051     #
4052     } else {
4053     $self->{onerror}->(node => $item->{node},
4054 wakaba 1.104 level => $self->{level}->{should},
4055 wakaba 1.40 type => 'no significant content');
4056     }
4057 wakaba 1.1 },
4058 wakaba 1.66 check_start => sub {
4059     my ($self, $item, $element_state) = @_;
4060    
4061     $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4062 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4063     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4064 wakaba 1.66 },
4065 wakaba 1.1 };
4066    
4067 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
4068 wakaba 1.72 %HTMLFlowContentChecker,
4069 wakaba 1.153 status => FEATURE_HTML5_WD,
4070 wakaba 1.72 ## NOTE: legend, Flow | Flow, legend?
4071 wakaba 1.41 check_child_element => sub {
4072     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4073     $child_is_transparent, $element_state) = @_;
4074 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4075     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4076 wakaba 1.41 $self->{onerror}->(node => $child_el,
4077     type => 'element not allowed:minus',
4078 wakaba 1.104 level => $self->{level}->{must});
4079 wakaba 1.41 $element_state->{has_non_legend} = 1;
4080     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4081     #
4082     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4083     if ($element_state->{has_legend_at_first}) {
4084     $self->{onerror}->(node => $child_el,
4085     type => 'element not allowed:figure legend',
4086 wakaba 1.104 level => $self->{level}->{must});
4087 wakaba 1.41 } elsif ($element_state->{has_legend}) {
4088     $self->{onerror}->(node => $element_state->{has_legend},
4089     type => 'element not allowed:figure legend',
4090 wakaba 1.104 level => $self->{level}->{must});
4091 wakaba 1.41 $element_state->{has_legend} = $child_el;
4092     } elsif ($element_state->{has_non_legend}) {
4093     $element_state->{has_legend} = $child_el;
4094     } else {
4095     $element_state->{has_legend_at_first} = 1;
4096 wakaba 1.35 }
4097 wakaba 1.41 delete $element_state->{has_non_legend};
4098     } else {
4099 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4100 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4101 wakaba 1.41 }
4102     },
4103     check_child_text => sub {
4104     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4105     if ($has_significant) {
4106     $element_state->{has_non_legend} = 1;
4107 wakaba 1.35 }
4108 wakaba 1.41 },
4109     check_end => sub {
4110     my ($self, $item, $element_state) = @_;
4111 wakaba 1.35
4112 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
4113     #
4114     } elsif ($element_state->{has_legend}) {
4115     if ($element_state->{has_non_legend}) {
4116     $self->{onerror}->(node => $element_state->{has_legend},
4117 wakaba 1.35 type => 'element not allowed:figure legend',
4118 wakaba 1.104 level => $self->{level}->{must});
4119 wakaba 1.35 }
4120     }
4121 wakaba 1.41
4122 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4123 wakaba 1.41 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4124 wakaba 1.35 },
4125     };
4126 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
4127 wakaba 1.1
4128 wakaba 1.92 my $AttrCheckerNotImplemented = sub {
4129     my ($self, $attr) = @_;
4130 wakaba 1.104 $self->{onerror}->(node => $attr,
4131     type => 'unknown attribute',
4132     level => $self->{level}->{uncertain});
4133 wakaba 1.92 };
4134    
4135 wakaba 1.1 $Element->{$HTML_NS}->{img} = {
4136 wakaba 1.40 %HTMLEmptyChecker,
4137 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4138 wakaba 1.40 check_attrs => sub {
4139     my ($self, $item, $element_state) = @_;
4140 wakaba 1.1 $GetHTMLAttrsChecker->({
4141 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4142     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4143     }),
4144 wakaba 1.1 alt => sub { }, ## NOTE: No syntactical requirement
4145 wakaba 1.70 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4146 wakaba 1.1 src => $HTMLURIAttrChecker,
4147     usemap => $HTMLUsemapAttrChecker,
4148 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4149 wakaba 1.1 ismap => sub {
4150 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
4151     if (not $self->{flag}->{in_a_href}) {
4152 wakaba 1.15 $self->{onerror}->(node => $attr,
4153 wakaba 1.59 type => 'attribute not allowed:ismap',
4154 wakaba 1.104 level => $self->{level}->{must});
4155 wakaba 1.1 }
4156 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4157 wakaba 1.1 },
4158 wakaba 1.70 longdesc => $HTMLURIAttrChecker,
4159     ## TODO: HTML4 |name|
4160 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4161 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4162 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4163 wakaba 1.49 }, {
4164     %HTMLAttrStatus,
4165 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4166 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4167 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4168 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
4169 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4170 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
4171 wakaba 1.153 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4172     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4173 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4174     name => FEATURE_M12N10_REC_DEPRECATED,
4175 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
4176 wakaba 1.153 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4177     usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4178 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
4179 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4180 wakaba 1.66 })->($self, $item, $element_state);
4181 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4182     $self->{onerror}->(node => $item->{node},
4183 wakaba 1.104 type => 'attribute missing',
4184     text => 'alt',
4185     level => $self->{level}->{should});
4186 wakaba 1.114 ## TODO: ...
4187 wakaba 1.1 }
4188 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4189     $self->{onerror}->(node => $item->{node},
4190 wakaba 1.104 type => 'attribute missing',
4191     text => 'src',
4192     level => $self->{level}->{must});
4193 wakaba 1.1 }
4194 wakaba 1.66
4195 wakaba 1.114 ## TODO: external resource check
4196    
4197 wakaba 1.66 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4198     $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4199     $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4200     $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4201 wakaba 1.1 },
4202     };
4203    
4204     $Element->{$HTML_NS}->{iframe} = {
4205 wakaba 1.40 %HTMLTextChecker,
4206 wakaba 1.114 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4207 wakaba 1.49 ## NOTE: Not part of M12N10 Strict
4208 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4209 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4210 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4211 wakaba 1.92 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4212     'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4213     }),
4214     seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4215 wakaba 1.1 src => $HTMLURIAttrChecker,
4216 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4217 wakaba 1.49 }, {
4218     %HTMLAttrStatus,
4219     %HTMLM12NCommonAttrStatus,
4220     align => FEATURE_XHTML10_REC,
4221 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4222 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
4223 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4224     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4225 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
4226     marginheight => FEATURE_M12N10_REC,
4227     marginwidth => FEATURE_M12N10_REC,
4228 wakaba 1.114 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4229     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4230     sandbox => FEATURE_HTML5_WD,
4231 wakaba 1.49 scrolling => FEATURE_M12N10_REC,
4232 wakaba 1.114 seemless => FEATURE_HTML5_WD,
4233     src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4234 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4235     width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4236 wakaba 1.1 }),
4237 wakaba 1.66 check_start => sub {
4238     my ($self, $item, $element_state) = @_;
4239    
4240     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4241 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4242     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4243 wakaba 1.66 },
4244 wakaba 1.40 };
4245    
4246 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
4247 wakaba 1.40 %HTMLEmptyChecker,
4248 wakaba 1.98 status => FEATURE_HTML5_WD,
4249 wakaba 1.40 check_attrs => sub {
4250     my ($self, $item, $element_state) = @_;
4251 wakaba 1.1 my $has_src;
4252 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4253 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4254     $attr_ns = '' unless defined $attr_ns;
4255     my $attr_ln = $attr->manakai_local_name;
4256     my $checker;
4257 wakaba 1.73
4258     my $status = {
4259     %HTMLAttrStatus,
4260 wakaba 1.153 height => FEATURE_HTML5_LC,
4261 wakaba 1.98 src => FEATURE_HTML5_WD,
4262     type => FEATURE_HTML5_WD,
4263 wakaba 1.153 width => FEATURE_HTML5_LC,
4264 wakaba 1.73 }->{$attr_ln};
4265    
4266 wakaba 1.1 if ($attr_ns eq '') {
4267     if ($attr_ln eq 'src') {
4268     $checker = $HTMLURIAttrChecker;
4269     $has_src = 1;
4270     } elsif ($attr_ln eq 'type') {
4271     $checker = $HTMLIMTAttrChecker;
4272 wakaba 1.92 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4273     $checker = $AttrCheckerNotImplemented; ## TODO: because spec does not define them yet.
4274 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4275     $attr_ln !~ /[A-Z]/) {
4276 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4277     $status = $HTMLDatasetAttrStatus;
4278 wakaba 1.117 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4279 wakaba 1.118 $attr_ln !~ /[A-Z]/ and
4280 wakaba 1.117 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4281 wakaba 1.1 $checker = $HTMLAttrChecker->{$attr_ln}
4282     || sub { }; ## NOTE: Any local attribute is ok.
4283 wakaba 1.98 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4284 wakaba 1.117 } else {
4285     $checker = $HTMLAttrChecker->{$attr_ln};
4286 wakaba 1.1 }
4287     }
4288     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4289 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4290     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4291     || $AttrStatus->{$attr_ns}->{''};
4292     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4293 wakaba 1.62
4294 wakaba 1.1 if ($checker) {
4295 wakaba 1.66 $checker->($self, $attr, $item, $element_state);
4296 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4297 wakaba 1.54 #
4298 wakaba 1.1 } else {
4299 wakaba 1.104 $self->{onerror}->(node => $attr,
4300     type => 'unknown attribute',
4301     level => $self->{level}->{uncertain});
4302 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
4303     }
4304    
4305 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4306 wakaba 1.1 }
4307    
4308     unless ($has_src) {
4309 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4310 wakaba 1.104 type => 'attribute missing',
4311     text => 'src',
4312 wakaba 1.114 level => $self->{level}->{info});
4313     ## NOTE: <embed> without src="" is allowed since revision 1929.
4314     ## We issues an informational message since <embed> w/o src=""
4315     ## is likely an authoring error.
4316 wakaba 1.1 }
4317 wakaba 1.114
4318     ## TODO: external resource check
4319 wakaba 1.66
4320     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4321 wakaba 1.1 },
4322     };
4323    
4324 wakaba 1.49 ## TODO:
4325     ## {applet} FEATURE_M12N10_REC_DEPRECATED
4326     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4327    
4328 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
4329 wakaba 1.40 %HTMLTransparentChecker,
4330 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4331 wakaba 1.40 check_attrs => sub {
4332     my ($self, $item, $element_state) = @_;
4333 wakaba 1.1 $GetHTMLAttrsChecker->({
4334 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
4335     bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4336     }),
4337     archive => $HTMLSpaceURIsAttrChecker,
4338     ## TODO: Relative to @codebase
4339     border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4340     classid => $HTMLURIAttrChecker,
4341     codebase => $HTMLURIAttrChecker,
4342     codetype => $HTMLIMTAttrChecker,
4343     ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4344 wakaba 1.1 data => $HTMLURIAttrChecker,
4345 wakaba 1.70 declare => $GetHTMLBooleanAttrChecker->('declare'),
4346     ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4347     ## [HTML4] but we don't know how to test this.
4348 wakaba 1.167 form => $HTMLFormAttrChecker,
4349 wakaba 1.70 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4350 wakaba 1.76 name => $HTMLBrowsingContextNameAttrChecker,
4351 wakaba 1.167 ## NOTE: |name| attribute of the |object| element defines
4352     ## the name of the browsing context created by the element,
4353     ## if any, but is also used as the form control name of the
4354     ## form control provided by the plugin, if any.
4355 wakaba 1.70 standby => sub {}, ## NOTE: %Text; in HTML4
4356 wakaba 1.1 type => $HTMLIMTAttrChecker,
4357     usemap => $HTMLUsemapAttrChecker,
4358 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4359 wakaba 1.70 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4360 wakaba 1.92 width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4361 wakaba 1.49 }, {
4362     %HTMLAttrStatus,
4363 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4364 wakaba 1.49 align => FEATURE_XHTML10_REC,
4365 wakaba 1.82 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4366 wakaba 1.49 border => FEATURE_XHTML10_REC,
4367     classid => FEATURE_M12N10_REC,
4368     codebase => FEATURE_M12N10_REC,
4369     codetype => FEATURE_M12N10_REC,
4370 wakaba 1.82 'content-length' => FEATURE_XHTML2_ED,
4371 wakaba 1.153 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4372 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
4373     dataformatas => FEATURE_HTML4_REC_RESERVED,
4374     datasrc => FEATURE_HTML4_REC_RESERVED,
4375 wakaba 1.82 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4376 wakaba 1.167 form => FEATURE_HTML5_DEFAULT,
4377 wakaba 1.153 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4378 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
4379 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4380     name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4381 wakaba 1.49 standby => FEATURE_M12N10_REC,
4382 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4383 wakaba 1.153 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4384     usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4385 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
4386 wakaba 1.153 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4387 wakaba 1.66 })->($self, $item, $element_state);
4388 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4389     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4390     $self->{onerror}->(node => $item->{node},
4391 wakaba 1.104 type => 'attribute missing:data|type',
4392     level => $self->{level}->{must});
4393 wakaba 1.1 }
4394     }
4395 wakaba 1.66
4396     $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4397     $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4398     $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4399     ## TODO: archive
4400     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4401 wakaba 1.1 },
4402 wakaba 1.72 ## NOTE: param*, transparent (Flow)
4403 wakaba 1.41 check_child_element => sub {
4404     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4405     $child_is_transparent, $element_state) = @_;
4406 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4407     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4408 wakaba 1.41 $self->{onerror}->(node => $child_el,
4409     type => 'element not allowed:minus',
4410 wakaba 1.104 level => $self->{level}->{must});
4411 wakaba 1.41 $element_state->{has_non_legend} = 1;
4412     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4413     #
4414     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4415     if ($element_state->{has_non_param}) {
4416 wakaba 1.104 $self->{onerror}->(node => $child_el,
4417 wakaba 1.72 type => 'element not allowed:flow',
4418 wakaba 1.104 level => $self->{level}->{must});
4419 wakaba 1.39 }
4420 wakaba 1.41 } else {
4421 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4422 wakaba 1.41 $element_state->{has_non_param} = 1;
4423 wakaba 1.39 }
4424 wakaba 1.25 },
4425 wakaba 1.41 check_child_text => sub {
4426     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4427     if ($has_significant) {
4428     $element_state->{has_non_param} = 1;
4429     }
4430 wakaba 1.42 },
4431     check_end => sub {
4432     my ($self, $item, $element_state) = @_;
4433     if ($element_state->{has_significant}) {
4434 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
4435 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
4436     ## NOTE: Transparent.
4437     } else {
4438     $self->{onerror}->(node => $item->{node},
4439 wakaba 1.104 level => $self->{level}->{should},
4440 wakaba 1.42 type => 'no significant content');
4441     }
4442     },
4443 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
4444 wakaba 1.1 };
4445 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4446     ## What about |<section><object data><style scoped></style>x</object></section>|?
4447     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4448 wakaba 1.1
4449     $Element->{$HTML_NS}->{param} = {
4450 wakaba 1.40 %HTMLEmptyChecker,
4451 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4452 wakaba 1.40 check_attrs => sub {
4453     my ($self, $item, $element_state) = @_;
4454 wakaba 1.1 $GetHTMLAttrsChecker->({
4455     name => sub { },
4456 wakaba 1.70 type => $HTMLIMTAttrChecker,
4457 wakaba 1.1 value => sub { },
4458 wakaba 1.70 valuetype => $GetHTMLEnumeratedAttrChecker->({
4459     data => 1, ref => 1, object => 1,
4460     }),
4461 wakaba 1.49 }, {
4462     %HTMLAttrStatus,
4463 wakaba 1.154 href => FEATURE_RDFA_REC,
4464 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4465     name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4466 wakaba 1.82 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4467 wakaba 1.153 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4468 wakaba 1.82 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4469 wakaba 1.66 })->(@_);
4470 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4471     $self->{onerror}->(node => $item->{node},
4472 wakaba 1.104 type => 'attribute missing',
4473     text => 'name',
4474     level => $self->{level}->{must});
4475 wakaba 1.1 }
4476 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4477     $self->{onerror}->(node => $item->{node},
4478 wakaba 1.104 type => 'attribute missing',
4479     text => 'value',
4480     level => $self->{level}->{must});
4481 wakaba 1.1 }
4482     },
4483     };
4484    
4485     $Element->{$HTML_NS}->{video} = {
4486 wakaba 1.40 %HTMLTransparentChecker,
4487 wakaba 1.48 status => FEATURE_HTML5_LC,
4488 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4489 wakaba 1.1 src => $HTMLURIAttrChecker,
4490     ## TODO: start, loopstart, loopend, end
4491     ## ISSUE: they MUST be "value time offset"s. Value?
4492 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
4493 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4494     controls => $GetHTMLBooleanAttrChecker->('controls'),
4495 wakaba 1.59 poster => $HTMLURIAttrChecker,
4496 wakaba 1.92 height => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4497     width => $AttrCheckerNotImplemented, ## TODO: spec does not define yet
4498 wakaba 1.50 }, {
4499     %HTMLAttrStatus,
4500     autoplay => FEATURE_HTML5_LC,
4501     controls => FEATURE_HTML5_LC,
4502 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4503 wakaba 1.50 height => FEATURE_HTML5_LC,
4504 wakaba 1.153 loopend => FEATURE_HTML5_AT_RISK,
4505     loopstart => FEATURE_HTML5_AT_RISK,
4506     playcount => FEATURE_HTML5_AT_RISK,
4507 wakaba 1.50 poster => FEATURE_HTML5_LC,
4508     src => FEATURE_HTML5_LC,
4509 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4510 wakaba 1.50 width => FEATURE_HTML5_LC,
4511 wakaba 1.1 }),
4512 wakaba 1.42 check_start => sub {
4513     my ($self, $item, $element_state) = @_;
4514     $element_state->{allow_source}
4515     = not $item->{node}->has_attribute_ns (undef, 'src');
4516     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4517     ## NOTE: It might be set true by |check_element|.
4518 wakaba 1.66
4519     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4520     $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4521 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4522     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4523 wakaba 1.42 },
4524     check_child_element => sub {
4525     my ($self, $item, $child_el, $child_nsuri, $child_ln,
4526     $child_is_transparent, $element_state) = @_;
4527 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4528     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4529 wakaba 1.42 $self->{onerror}->(node => $child_el,
4530     type => 'element not allowed:minus',
4531 wakaba 1.104 level => $self->{level}->{must});
4532 wakaba 1.42 delete $element_state->{allow_source};
4533     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4534     #
4535     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4536 wakaba 1.45 unless ($element_state->{allow_source}) {
4537 wakaba 1.104 $self->{onerror}->(node => $child_el,
4538 wakaba 1.72 type => 'element not allowed:flow',
4539 wakaba 1.104 level => $self->{level}->{must});
4540 wakaba 1.42 }
4541 wakaba 1.45 $element_state->{has_source} = 1;
4542 wakaba 1.1 } else {
4543 wakaba 1.42 delete $element_state->{allow_source};
4544 wakaba 1.72 $HTMLFlowContentChecker{check_child_element}->(@_);
4545 wakaba 1.42 }
4546     },
4547     check_child_text => sub {
4548     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4549     if ($has_significant) {
4550     delete $element_state->{allow_source};
4551     }
4552 wakaba 1.72 $HTMLFlowContentChecker{check_child_text}->(@_);
4553 wakaba 1.42 },
4554     check_end => sub {
4555     my ($self, $item, $element_state) = @_;
4556     if ($element_state->{has_source} == -1) {
4557     $self->{onerror}->(node => $item->{node},
4558 wakaba 1.104 type => 'child element missing',
4559     text => 'source',
4560     level => $self->{level}->{must});
4561 wakaba 1.1 }
4562 wakaba 1.42
4563     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4564 wakaba 1.1 },
4565     };
4566    
4567     $Element->{$HTML_NS}->{audio} = {
4568 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
4569 wakaba 1.48 status => FEATURE_HTML5_LC,
4570 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
4571     src => $HTMLURIAttrChecker,
4572     ## TODO: start, loopstart, loopend, end
4573     ## ISSUE: they MUST be "value time offset"s. Value?
4574     ## ISSUE: playcount has no conformance creteria
4575     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
4576     controls => $GetHTMLBooleanAttrChecker->('controls'),
4577 wakaba 1.50 }, {
4578     %HTMLAttrStatus,
4579     autoplay => FEATURE_HTML5_LC,
4580     controls => FEATURE_HTML5_LC,
4581 wakaba 1.153 end => FEATURE_HTML5_AT_RISK,
4582     loopend => FEATURE_HTML5_AT_RISK,
4583     loopstart => FEATURE_HTML5_AT_RISK,
4584     playcount => FEATURE_HTML5_AT_RISK,
4585 wakaba 1.50 src => FEATURE_HTML5_LC,
4586 wakaba 1.153 start => FEATURE_HTML5_AT_RISK,
4587 wakaba 1.42 }),
4588 wakaba 1.1 };
4589    
4590     $Element->{$HTML_NS}->{source} = {
4591 wakaba 1.40 %HTMLEmptyChecker,
4592 wakaba 1.153 status => FEATURE_HTML5_LC,
4593 wakaba 1.40 check_attrs => sub {
4594     my ($self, $item, $element_state) = @_;
4595 wakaba 1.1 $GetHTMLAttrsChecker->({
4596 wakaba 1.90 media => $HTMLMQAttrChecker,
4597     pixelratio => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
4598     src => $HTMLURIAttrChecker, ## ISSUE: Negative or zero pixelratio=""
4599 wakaba 1.1 type => $HTMLIMTAttrChecker,
4600 wakaba 1.50 }, {
4601     %HTMLAttrStatus,
4602 wakaba 1.153 media => FEATURE_HTML5_LC,
4603     pixelratio => FEATURE_HTML5_LC,
4604     src => FEATURE_HTML5_LC,
4605     type => FEATURE_HTML5_LC,
4606 wakaba 1.66 })->(@_);
4607 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4608     $self->{onerror}->(node => $item->{node},
4609 wakaba 1.104 type => 'attribute missing',
4610     text => 'src',
4611     level => $self->{level}->{must});
4612 wakaba 1.1 }
4613 wakaba 1.66
4614     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4615 wakaba 1.1 },
4616     };
4617    
4618     $Element->{$HTML_NS}->{canvas} = {
4619 wakaba 1.40 %HTMLTransparentChecker,
4620 wakaba 1.89 status => FEATURE_HTML5_COMPLETE,
4621 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
4622 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4623     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4624 wakaba 1.50 }, {
4625     %HTMLAttrStatus,
4626 wakaba 1.89 height => FEATURE_HTML5_COMPLETE,
4627     width => FEATURE_HTML5_COMPLETE,
4628 wakaba 1.1 }),
4629     };
4630    
4631     $Element->{$HTML_NS}->{map} = {
4632 wakaba 1.72 %HTMLFlowContentChecker,
4633 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4634 wakaba 1.40 check_attrs => sub {
4635     my ($self, $item, $element_state) = @_;
4636 wakaba 1.100 my $has_name;
4637 wakaba 1.4 $GetHTMLAttrsChecker->({
4638 wakaba 1.100 name => sub {
4639     my ($self, $attr) = @_;
4640     my $value = $attr->value;
4641     if (length $value) {
4642     ## NOTE: Duplication is not non-conforming.
4643     ## NOTE: Space characters are not non-conforming.
4644     #
4645     } else {
4646     $self->{onerror}->(node => $attr,
4647     type => 'empty attribute value',
4648 wakaba 1.104 level => $self->{level}->{must});
4649 wakaba 1.100 }
4650 wakaba 1.4 $self->{map}->{$value} ||= $attr;
4651 wakaba 1.100 $has_name = [$value, $attr];
4652 wakaba 1.4 },
4653 wakaba 1.49 }, {
4654     %HTMLAttrStatus,
4655 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4656     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4657     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4658     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4659     #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
4660     name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4661 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4662     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4663     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4664     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4665     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4666     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4667     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4668     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4669     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4670     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4671 wakaba 1.153 title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4672 wakaba 1.66 })->(@_);
4673 wakaba 1.100
4674 wakaba 1.135 if ($has_name) {
4675 wakaba 1.145 my $id = $item->{node}->get_attribute_ns (undef, 'id');
4676 wakaba 1.135 if (defined $id and $has_name->[0] ne $id) {
4677 wakaba 1.155 $self->{onerror}
4678     ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
4679     type => 'id ne name',
4680     level => $self->{level}->{must});
4681 wakaba 1.100 }
4682 wakaba 1.135 } else {
4683 wakaba 1.100 $self->{onerror}->(node => $item->{node},
4684 wakaba 1.104 type => 'attribute missing',
4685     text => 'name',
4686     level => $self->{level}->{must});
4687 wakaba 1.100 }
4688 wakaba 1.4 },
4689 wakaba 1.59 check_start => sub {
4690     my ($self, $item, $element_state) = @_;
4691     $element_state->{in_map_original} = $self->{flag}->{in_map};
4692 wakaba 1.137 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
4693     ## NOTE: |{in_map}| is a reference to the array which contains
4694     ## hash references. Hashes are corresponding to the opening
4695     ## |map| elements and each of them contains the key-value
4696     ## pairs corresponding to the absolute URLs for the processed
4697     ## |area| elements in the |map| element corresponding to the
4698     ## hash. The key represents the resource (## TODO: use
4699     ## absolute URL), while the value represents whether there is
4700     ## an |area| element whose |alt| attribute is specified to a
4701     ## non-empty value. If there IS such an |area| element for
4702     ## the resource specified by the key, then the value is set to
4703     ## zero (|0|). Otherwise, if there is no such an |area|
4704     ## element but there is any |area| element with the empty
4705     ## |alt=""| attribute, then the value contains an array
4706     ## reference that contains all of such |area| elements.
4707 wakaba 1.79
4708     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4709     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4710 wakaba 1.59 },
4711     check_end => sub {
4712     my ($self, $item, $element_state) = @_;
4713 wakaba 1.137
4714     for (keys %{$self->{flag}->{in_map}->[-1]}) {
4715     my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
4716     next unless $nodes;
4717     for (@$nodes) {
4718     $self->{onerror}->(type => 'empty area alt',
4719     node => $_,
4720     level => $self->{level}->{html5_no_may});
4721     }
4722     }
4723    
4724     $self->{flag}->{in_map} = $element_state->{in_map_original};
4725    
4726 wakaba 1.72 $HTMLFlowContentChecker{check_end}->(@_);
4727 wakaba 1.59 },
4728 wakaba 1.1 };
4729    
4730     $Element->{$HTML_NS}->{area} = {
4731 wakaba 1.40 %HTMLEmptyChecker,
4732 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4733 wakaba 1.40 check_attrs => sub {
4734     my ($self, $item, $element_state) = @_;
4735 wakaba 1.1 my %attr;
4736     my $coords;
4737 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
4738 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
4739     $attr_ns = '' unless defined $attr_ns;
4740     my $attr_ln = $attr->manakai_local_name;
4741     my $checker;
4742 wakaba 1.73 my $status;
4743 wakaba 1.1 if ($attr_ns eq '') {
4744 wakaba 1.73 $status = {
4745     %HTMLAttrStatus,
4746     %HTMLM12NCommonAttrStatus,
4747     accesskey => FEATURE_M12N10_REC,
4748 wakaba 1.153 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4749     coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4750 wakaba 1.154 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
4751 wakaba 1.153 hreflang => FEATURE_HTML5_WD,
4752     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
4753 wakaba 1.154 media => FEATURE_HTML5_WD,
4754 wakaba 1.73 nohref => FEATURE_M12N10_REC,
4755     onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4756     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4757 wakaba 1.153 ping => FEATURE_HTML5_WD,
4758 wakaba 1.154 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
4759 wakaba 1.153 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4760 wakaba 1.73 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4761 wakaba 1.153 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4762     type => FEATURE_HTML5_WD,
4763 wakaba 1.73 }->{$attr_ln};
4764    
4765 wakaba 1.1 $checker = {
4766 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
4767 wakaba 1.153 alt => sub {
4768     ## NOTE: Checked later.
4769     },
4770 wakaba 1.1 shape => $GetHTMLEnumeratedAttrChecker->({
4771     circ => -1, circle => 1,
4772     default => 1,
4773     poly => 1, polygon => -1,
4774     rect => 1, rectangle => -1,
4775     }),
4776     coords => sub {
4777     my ($self, $attr) = @_;
4778     my $value = $attr->value;
4779     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
4780     $coords = [split /,/, $value];
4781     } else {
4782     $self->{onerror}->(node => $attr,
4783 wakaba 1.104 type => 'coords:syntax error',
4784     level => $self->{level}->{must});
4785 wakaba 1.1 }
4786     },
4787 wakaba 1.70 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
4788     target => $HTMLTargetAttrChecker,
4789 wakaba 1.1 href => $HTMLURIAttrChecker,
4790     ping => $HTMLSpaceURIsAttrChecker,
4791 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
4792 wakaba 1.1 media => $HTMLMQAttrChecker,
4793     hreflang => $HTMLLanguageTagAttrChecker,
4794     type => $HTMLIMTAttrChecker,
4795     }->{$attr_ln};
4796     if ($checker) {
4797     $attr{$attr_ln} = $attr;
4798 wakaba 1.122 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4799     $attr_ln !~ /[A-Z]/) {
4800 wakaba 1.73 $checker = $HTMLDatasetAttrChecker;
4801     $status = $HTMLDatasetAttrStatus;
4802 wakaba 1.1 } else {
4803     $checker = $HTMLAttrChecker->{$attr_ln};
4804     }
4805     }
4806     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4807 wakaba 1.82 || $AttrChecker->{$attr_ns}->{''};
4808     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4809     || $AttrStatus->{$attr_ns}->{''};
4810     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4811 wakaba 1.62
4812 wakaba 1.1 if ($checker) {
4813 wakaba 1.66 $checker->($self, $attr, $item, $element_state) if ref $checker;
4814 wakaba 1.62 } elsif ($attr_ns eq '' and not $status) {
4815 wakaba 1.54 #
4816 wakaba 1.1 } else {
4817 wakaba 1.104 $self->{onerror}->(node => $attr,
4818     type => 'unknown attribute',
4819     level => $self->{level}->{uncertain});
4820 wakaba 1.1 ## ISSUE: No comformance createria for unknown attributes in the spec
4821     }
4822 wakaba 1.49
4823 wakaba 1.82 $self->_attr_status_info ($attr, $status);
4824 wakaba 1.1 }
4825    
4826     if (defined $attr{href}) {
4827 wakaba 1.4 $self->{has_hyperlink_element} = 1;
4828 wakaba 1.137 if (defined $attr{alt}) {
4829     my $url = $attr{href}->value; ## TODO: resolve
4830     if (length $attr{alt}->value) {
4831     for (@{$self->{flag}->{in_map} or []}) {
4832     $_->{$url} = 0;
4833     }
4834     } else {
4835     ## NOTE: Empty |alt=""|. If there is another |area| element
4836     ## with the same |href=""| and that |area| elemnet's
4837     ## |alt=""| attribute is not an empty string, then this
4838     ## is conforming.
4839     for (@{$self->{flag}->{in_map} or []}) {
4840     push @{$_->{$url} ||= []}, $attr{alt}
4841     unless exists $_->{$url} and not $_->{$url};
4842     }
4843     }
4844     } else {
4845 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4846 wakaba 1.104 type => 'attribute missing',
4847     text => 'alt',
4848     level => $self->{level}->{must});
4849 wakaba 1.1 }
4850     } else {
4851     for (qw/target ping rel media hreflang type alt/) {
4852     if (defined $attr{$_}) {
4853     $self->{onerror}->(node => $attr{$_},
4854 wakaba 1.104 type => 'attribute not allowed',
4855     level => $self->{level}->{must});
4856 wakaba 1.1 }
4857     }
4858     }
4859    
4860     my $shape = 'rectangle';
4861     if (defined $attr{shape}) {
4862     $shape = {
4863     circ => 'circle', circle => 'circle',
4864     default => 'default',
4865     poly => 'polygon', polygon => 'polygon',
4866     rect => 'rectangle', rectangle => 'rectangle',
4867     }->{lc $attr{shape}->value} || 'rectangle';
4868     ## TODO: ASCII lowercase?
4869     }
4870    
4871     if ($shape eq 'circle') {
4872     if (defined $attr{coords}) {
4873     if (defined $coords) {
4874     if (@$coords == 3) {
4875     if ($coords->[2] < 0) {
4876     $self->{onerror}->(node => $attr{coords},
4877 wakaba 1.104 type => 'coords:out of range',
4878     index => 2,
4879     value => $coords->[2],
4880     level => $self->{level}->{must});
4881 wakaba 1.1 }
4882     } else {
4883     $self->{onerror}->(node => $attr{coords},
4884 wakaba 1.104 type => 'coords:number not 3',
4885     text => 0+@$coords,
4886     level => $self->{level}->{must});
4887 wakaba 1.1 }
4888     } else {
4889     ## NOTE: A syntax error has been reported.
4890     }
4891     } else {
4892 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4893 wakaba 1.104 type => 'attribute missing',
4894     text => 'coords',
4895     level => $self->{level}->{must});
4896 wakaba 1.1 }
4897     } elsif ($shape eq 'default') {
4898     if (defined $attr{coords}) {
4899     $self->{onerror}->(node => $attr{coords},
4900 wakaba 1.104 type => 'attribute not allowed',
4901     level => $self->{level}->{must});
4902 wakaba 1.1 }
4903     } elsif ($shape eq 'polygon') {
4904     if (defined $attr{coords}) {
4905     if (defined $coords) {
4906     if (@$coords >= 6) {
4907     unless (@$coords % 2 == 0) {
4908     $self->{onerror}->(node => $attr{coords},
4909 wakaba 1.104 type => 'coords:number not even',
4910     text => 0+@$coords,
4911     level => $self->{level}->{must});
4912 wakaba 1.1 }
4913     } else {
4914     $self->{onerror}->(node => $attr{coords},
4915 wakaba 1.104 type => 'coords:number lt 6',
4916     text => 0+@$coords,
4917     level => $self->{level}->{must});
4918 wakaba 1.1 }
4919     } else {
4920     ## NOTE: A syntax error has been reported.
4921     }
4922     } else {
4923 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4924 wakaba 1.104 type => 'attribute missing',
4925     text => 'coords',
4926     level => $self->{level}->{must});
4927 wakaba 1.1 }
4928     } elsif ($shape eq 'rectangle') {
4929     if (defined $attr{coords}) {
4930     if (defined $coords) {
4931     if (@$coords == 4) {
4932     unless ($coords->[0] < $coords->[2]) {
4933     $self->{onerror}->(node => $attr{coords},
4934 wakaba 1.104 type => 'coords:out of range',
4935     index => 0,
4936     value => $coords->[0],
4937     level => $self->{level}->{must});
4938 wakaba 1.1 }
4939     unless ($coords->[1] < $coords->[3]) {
4940     $self->{onerror}->(node => $attr{coords},
4941 wakaba 1.104 type => 'coords:out of range',
4942     index => 1,
4943     value => $coords->[1],
4944     level => $self->{level}->{must});
4945 wakaba 1.1 }
4946     } else {
4947     $self->{onerror}->(node => $attr{coords},
4948 wakaba 1.104 type => 'coords:number not 4',
4949     text => 0+@$coords,
4950     level => $self->{level}->{must});
4951 wakaba 1.1 }
4952     } else {
4953     ## NOTE: A syntax error has been reported.
4954     }
4955     } else {
4956 wakaba 1.40 $self->{onerror}->(node => $item->{node},
4957 wakaba 1.104 type => 'attribute missing',
4958     text => 'coords',
4959     level => $self->{level}->{must});
4960 wakaba 1.1 }
4961     }
4962 wakaba 1.66
4963     $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
4964 wakaba 1.1 },
4965 wakaba 1.59 check_start => sub {
4966     my ($self, $item, $element_state) = @_;
4967     unless ($self->{flag}->{in_map} or
4968     not $item->{node}->manakai_parent_element) {
4969     $self->{onerror}->(node => $item->{node},
4970     type => 'element not allowed:area',
4971 wakaba 1.104 level => $self->{level}->{must});
4972 wakaba 1.59 }
4973 wakaba 1.79
4974     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4975     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4976 wakaba 1.59 },
4977 wakaba 1.1 };
4978    
4979     $Element->{$HTML_NS}->{table} = {
4980 wakaba 1.40 %HTMLChecker,
4981 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4982 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
4983 wakaba 1.86 cellpadding => $HTMLLengthAttrChecker,
4984     cellspacing => $HTMLLengthAttrChecker,
4985 wakaba 1.69 frame => $GetHTMLEnumeratedAttrChecker->({
4986     void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
4987     lhs => 1, rhs => 1, box => 1, border => 1,
4988     }),
4989     rules => $GetHTMLEnumeratedAttrChecker->({
4990     none => 1, groups => 1, rows => 1, cols => 1, all => 1,
4991     }),
4992     summary => sub {}, ## NOTE: %Text; in HTML4.
4993     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
4994     }, {
4995 wakaba 1.49 %HTMLAttrStatus,
4996 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
4997 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
4998     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
4999     border => FEATURE_M12N10_REC,
5000     cellpadding => FEATURE_M12N10_REC,
5001     cellspacing => FEATURE_M12N10_REC,
5002 wakaba 1.61 cols => FEATURE_RFC1942,
5003 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
5004     dataformatas => FEATURE_HTML4_REC_RESERVED,
5005     datapagesize => FEATURE_M12N10_REC,
5006     datasrc => FEATURE_HTML4_REC_RESERVED,
5007     frame => FEATURE_M12N10_REC,
5008 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5009 wakaba 1.49 rules => FEATURE_M12N10_REC,
5010     summary => FEATURE_M12N10_REC,
5011     width => FEATURE_M12N10_REC,
5012     }),
5013 wakaba 1.40 check_start => sub {
5014     my ($self, $item, $element_state) = @_;
5015     $element_state->{phase} = 'before caption';
5016 wakaba 1.66
5017     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5018 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5019     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5020 wakaba 1.40 },
5021     check_child_element => sub {
5022     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5023     $child_is_transparent, $element_state) = @_;
5024 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5025     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5026 wakaba 1.40 $self->{onerror}->(node => $child_el,
5027     type => 'element not allowed:minus',
5028 wakaba 1.104 level => $self->{level}->{must});
5029 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5030     #
5031     } elsif ($element_state->{phase} eq 'in tbodys') {
5032     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5033     #$element_state->{phase} = 'in tbodys';
5034     } elsif (not $element_state->{has_tfoot} and
5035     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5036     $element_state->{phase} = 'after tfoot';
5037     $element_state->{has_tfoot} = 1;
5038     } else {
5039 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5040     level => $self->{level}->{must});
5041 wakaba 1.40 }
5042     } elsif ($element_state->{phase} eq 'in trs') {
5043     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5044     #$element_state->{phase} = 'in trs';
5045     } elsif (not $element_state->{has_tfoot} and
5046     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5047     $element_state->{phase} = 'after tfoot';
5048     $element_state->{has_tfoot} = 1;
5049     } else {
5050 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5051     level => $self->{level}->{must});
5052 wakaba 1.40 }
5053     } elsif ($element_state->{phase} eq 'after thead') {
5054     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5055     $element_state->{phase} = 'in tbodys';
5056     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5057     $element_state->{phase} = 'in trs';
5058     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5059     $element_state->{phase} = 'in tbodys';
5060     $element_state->{has_tfoot} = 1;
5061     } else {
5062 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5063     level => $self->{level}->{must});
5064 wakaba 1.40 }
5065     } elsif ($element_state->{phase} eq 'in colgroup') {
5066     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5067     $element_state->{phase} = 'in colgroup';
5068     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5069     $element_state->{phase} = 'after thead';
5070     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5071     $element_state->{phase} = 'in tbodys';
5072     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5073     $element_state->{phase} = 'in trs';
5074     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5075     $element_state->{phase} = 'in tbodys';
5076     $element_state->{has_tfoot} = 1;
5077     } else {
5078 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5079     level => $self->{level}->{must});
5080 wakaba 1.40 }
5081     } elsif ($element_state->{phase} eq 'before caption') {
5082     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5083     $element_state->{phase} = 'in colgroup';
5084     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5085     $element_state->{phase} = 'in colgroup';
5086     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5087     $element_state->{phase} = 'after thead';
5088     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5089     $element_state->{phase} = 'in tbodys';
5090     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5091     $element_state->{phase} = 'in trs';
5092     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5093     $element_state->{phase} = 'in tbodys';
5094     $element_state->{has_tfoot} = 1;
5095     } else {
5096 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5097     level => $self->{level}->{must});
5098 wakaba 1.40 }
5099     } elsif ($element_state->{phase} eq 'after tfoot') {
5100 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5101     level => $self->{level}->{must});
5102 wakaba 1.40 } else {
5103     die "check_child_element: Bad |table| phase: $element_state->{phase}";
5104     }
5105     },
5106     check_child_text => sub {
5107     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5108     if ($has_significant) {
5109 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5110     level => $self->{level}->{must});
5111 wakaba 1.1 }
5112 wakaba 1.40 },
5113     check_end => sub {
5114     my ($self, $item, $element_state) = @_;
5115 wakaba 1.1
5116     ## Table model errors
5117     require Whatpm::HTMLTable;
5118 wakaba 1.87 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5119 wakaba 1.104 $self->{onerror}->(@_);
5120     }, $self->{level});
5121 wakaba 1.87 Whatpm::HTMLTable->assign_header
5122 wakaba 1.104 ($table, $self->{onerror}, $self->{level});
5123 wakaba 1.87 push @{$self->{return}->{table}}, $table;
5124 wakaba 1.1
5125 wakaba 1.40 $HTMLChecker{check_end}->(@_);
5126 wakaba 1.1 },
5127     };
5128    
5129     $Element->{$HTML_NS}->{caption} = {
5130 wakaba 1.40 %HTMLPhrasingContentChecker,
5131 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5132 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5133     align => $GetHTMLEnumeratedAttrChecker->({
5134     top => 1, bottom => 1, left => 1, right => 1,
5135     }),
5136     }, {
5137 wakaba 1.49 %HTMLAttrStatus,
5138 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5139 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
5140 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5141 wakaba 1.49 }),
5142 wakaba 1.1 };
5143    
5144 wakaba 1.69 my %cellalign = (
5145     ## HTML4 %cellhalign;
5146 wakaba 1.70 align => $GetHTMLEnumeratedAttrChecker->({
5147     left => 1, center => 1, right => 1, justify => 1, char => 1,
5148     }),
5149     char => sub {
5150     my ($self, $attr) = @_;
5151 wakaba 1.69
5152 wakaba 1.70 ## NOTE: "character" or |%Character;| in HTML4.
5153    
5154     my $value = $attr->value;
5155     if (length $value != 1) {
5156     $self->{onerror}->(node => $attr, type => 'char:syntax error',
5157 wakaba 1.105 level => $self->{level}->{html4_fact});
5158 wakaba 1.70 }
5159     },
5160 wakaba 1.86 charoff => $HTMLLengthAttrChecker,
5161    
5162 wakaba 1.69 ## HTML4 %cellvalign;
5163 wakaba 1.70 valign => $GetHTMLEnumeratedAttrChecker->({
5164     top => 1, middle => 1, bottom => 1, baseline => 1,
5165     }),
5166 wakaba 1.69 );
5167    
5168 wakaba 1.1 $Element->{$HTML_NS}->{colgroup} = {
5169 wakaba 1.40 %HTMLEmptyChecker,
5170 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5171 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5172 wakaba 1.69 %cellalign,
5173 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5174     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5175     ## TODO: "attribute not supported" if |col|.
5176     ## ISSUE: MUST NOT if any |col|?
5177     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5178 wakaba 1.49 }, {
5179     %HTMLAttrStatus,
5180 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5181 wakaba 1.49 align => FEATURE_M12N10_REC,
5182     char => FEATURE_M12N10_REC,
5183     charoff => FEATURE_M12N10_REC,
5184 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5185     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5186 wakaba 1.49 valign => FEATURE_M12N10_REC,
5187     width => FEATURE_M12N10_REC,
5188 wakaba 1.1 }),
5189 wakaba 1.40 check_child_element => sub {
5190     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5191     $child_is_transparent, $element_state) = @_;
5192 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5193     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5194 wakaba 1.40 $self->{onerror}->(node => $child_el,
5195     type => 'element not allowed:minus',
5196 wakaba 1.104 level => $self->{level}->{must});
5197 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5198     #
5199     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5200     #
5201     } else {
5202 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5203     level => $self->{level}->{must});
5204 wakaba 1.40 }
5205     },
5206     check_child_text => sub {
5207     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5208     if ($has_significant) {
5209 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5210     level => $self->{level}->{must});
5211 wakaba 1.1 }
5212     },
5213     };
5214    
5215     $Element->{$HTML_NS}->{col} = {
5216 wakaba 1.40 %HTMLEmptyChecker,
5217 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5218 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5219 wakaba 1.69 %cellalign,
5220 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5221 wakaba 1.49 }, {
5222     %HTMLAttrStatus,
5223 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5224 wakaba 1.49 align => FEATURE_M12N10_REC,
5225     char => FEATURE_M12N10_REC,
5226     charoff => FEATURE_M12N10_REC,
5227 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5228     span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5229 wakaba 1.49 valign => FEATURE_M12N10_REC,
5230     width => FEATURE_M12N10_REC,
5231 wakaba 1.1 }),
5232     };
5233    
5234     $Element->{$HTML_NS}->{tbody} = {
5235 wakaba 1.40 %HTMLChecker,
5236 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5237 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5238     %cellalign,
5239     }, {
5240 wakaba 1.49 %HTMLAttrStatus,
5241 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5242 wakaba 1.49 align => FEATURE_M12N10_REC,
5243     char => FEATURE_M12N10_REC,
5244     charoff => FEATURE_M12N10_REC,
5245 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5246 wakaba 1.49 valign => FEATURE_M12N10_REC,
5247     }),
5248 wakaba 1.40 check_child_element => sub {
5249     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5250     $child_is_transparent, $element_state) = @_;
5251 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5252     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5253 wakaba 1.40 $self->{onerror}->(node => $child_el,
5254     type => 'element not allowed:minus',
5255 wakaba 1.104 level => $self->{level}->{must});
5256 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5257     #
5258     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5259 wakaba 1.84 #
5260 wakaba 1.40 } else {
5261 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5262     level => $self->{level}->{must});
5263 wakaba 1.40 }
5264     },
5265     check_child_text => sub {
5266     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5267     if ($has_significant) {
5268 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5269     level => $self->{level}->{must});
5270 wakaba 1.1 }
5271 wakaba 1.40 },
5272 wakaba 1.1 };
5273    
5274     $Element->{$HTML_NS}->{thead} = {
5275 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5276 wakaba 1.1 };
5277    
5278     $Element->{$HTML_NS}->{tfoot} = {
5279 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
5280 wakaba 1.1 };
5281    
5282     $Element->{$HTML_NS}->{tr} = {
5283 wakaba 1.40 %HTMLChecker,
5284 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5285 wakaba 1.69 check_attrs => $GetHTMLAttrsChecker->({
5286     %cellalign,
5287     bgcolor => $HTMLColorAttrChecker,
5288     }, {
5289 wakaba 1.49 %HTMLAttrStatus,
5290 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5291 wakaba 1.49 align => FEATURE_M12N10_REC,
5292     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5293     char => FEATURE_M12N10_REC,
5294     charoff => FEATURE_M12N10_REC,
5295 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5296 wakaba 1.49 valign => FEATURE_M12N10_REC,
5297     }),
5298 wakaba 1.40 check_child_element => sub {
5299     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5300     $child_is_transparent, $element_state) = @_;
5301 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5302     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5303 wakaba 1.40 $self->{onerror}->(node => $child_el,
5304     type => 'element not allowed:minus',
5305 wakaba 1.104 level => $self->{level}->{must});
5306 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5307     #
5308     } elsif ($child_nsuri eq $HTML_NS and
5309     ($child_ln eq 'td' or $child_ln eq 'th')) {
5310 wakaba 1.84 #
5311 wakaba 1.40 } else {
5312 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5313     level => $self->{level}->{must});
5314 wakaba 1.40 }
5315     },
5316     check_child_text => sub {
5317     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5318     if ($has_significant) {
5319 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5320     level => $self->{level}->{must});
5321 wakaba 1.1 }
5322     },
5323     };
5324    
5325     $Element->{$HTML_NS}->{td} = {
5326 wakaba 1.72 %HTMLFlowContentChecker,
5327 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5328 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5329 wakaba 1.69 %cellalign,
5330     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5331     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5332     bgcolor => $HTMLColorAttrChecker,
5333 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5334 wakaba 1.87 headers => sub {
5335     ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5336     ## Though that method does not check the |headers| attribute of a
5337     ## |td| element if the element does not form a table, in that case
5338     ## the |td| element is non-conforming anyway.
5339     },
5340 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5341 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5342 wakaba 1.69 scope => $GetHTMLEnumeratedAttrChecker
5343     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5344 wakaba 1.49 }, {
5345     %HTMLAttrStatus,
5346 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5347     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5348 wakaba 1.49 align => FEATURE_M12N10_REC,
5349 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5350 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5351     char => FEATURE_M12N10_REC,
5352     charoff => FEATURE_M12N10_REC,
5353 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5354     headers => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5355 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5356 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5357 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5358 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5359 wakaba 1.82 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5360 wakaba 1.49 valign => FEATURE_M12N10_REC,
5361     width => FEATURE_M12N10_REC_DEPRECATED,
5362 wakaba 1.1 }),
5363     };
5364    
5365     $Element->{$HTML_NS}->{th} = {
5366 wakaba 1.40 %HTMLPhrasingContentChecker,
5367 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5368 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
5369 wakaba 1.69 %cellalign,
5370     abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5371     axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5372     bgcolor => $HTMLColorAttrChecker,
5373 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5374 wakaba 1.87 ## TODO: HTML4(?) |headers|
5375 wakaba 1.69 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5376 wakaba 1.1 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5377     scope => $GetHTMLEnumeratedAttrChecker
5378     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5379 wakaba 1.49 }, {
5380     %HTMLAttrStatus,
5381 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
5382     abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5383 wakaba 1.49 align => FEATURE_M12N10_REC,
5384 wakaba 1.82 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5385 wakaba 1.49 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5386     char => FEATURE_M12N10_REC,
5387     charoff => FEATURE_M12N10_REC,
5388 wakaba 1.153 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5389 wakaba 1.82 headers => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5390 wakaba 1.49 height => FEATURE_M12N10_REC_DEPRECATED,
5391 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5392 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5393 wakaba 1.153 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5394     scope => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5395 wakaba 1.49 valign => FEATURE_M12N10_REC,
5396     width => FEATURE_M12N10_REC_DEPRECATED,
5397 wakaba 1.1 }),
5398     };
5399    
5400 wakaba 1.52 $Element->{$HTML_NS}->{form} = {
5401 wakaba 1.121 %HTMLFlowContentChecker,
5402 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5403 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
5404 wakaba 1.161 accept => $AcceptAttrChecker,
5405 wakaba 1.129 'accept-charset' => $HTMLCharsetsAttrChecker,
5406 wakaba 1.166 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5407 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5408 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5409     'application/x-www-form-urlencoded' => 1,
5410     'multipart/form-data' => 1,
5411     'text/plain' => 1,
5412     }),
5413 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
5414     get => 1, post => 1, put => 1, delete => 1,
5415     }),
5416 wakaba 1.133 name => sub {
5417     my ($self, $attr) = @_;
5418    
5419     my $value = $attr->value;
5420     if ($value eq '') {
5421     $self->{onerror}->(type => 'empty form name',
5422     node => $attr,
5423     level => $self->{level}->{must});
5424     } else {
5425     if ($self->{form}->{$value}) {
5426     $self->{onerror}->(type => 'duplicate form name',
5427     node => $attr,
5428     value => $value,
5429     level => $self->{level}->{must});
5430     } else {
5431     $self->{form}->{$value} = 1;
5432     }
5433     }
5434     },
5435 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5436     ## TODO: Tests for following attrs:
5437 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
5438     onforminput => $HTMLEventHandlerAttrChecker,
5439 wakaba 1.56 onreceived => $HTMLEventHandlerAttrChecker,
5440     replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5441 wakaba 1.52 target => $HTMLTargetAttrChecker,
5442     }, {
5443     %HTMLAttrStatus,
5444     %HTMLM12NCommonAttrStatus,
5445 wakaba 1.161 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5446 wakaba 1.119 'accept-charset' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5447     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5448 wakaba 1.56 data => FEATURE_WF2,
5449 wakaba 1.119 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5450 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5451 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5452     #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5453     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5454 wakaba 1.166 novalidate => FEATURE_HTML5_DEFAULT,
5455 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
5456     onforminput => FEATURE_WF2_INFORMATIVE,
5457 wakaba 1.56 onreceived => FEATURE_WF2,
5458 wakaba 1.52 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5459     onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5460 wakaba 1.56 replace => FEATURE_WF2,
5461 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
5462     sdasuff => FEATURE_HTML20_RFC,
5463 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5464 wakaba 1.52 }),
5465 wakaba 1.66 check_start => sub {
5466     my ($self, $item, $element_state) = @_;
5467 wakaba 1.121 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5468 wakaba 1.66
5469     $element_state->{uri_info}->{action}->{type}->{action} = 1;
5470     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5471 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5472     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5473 wakaba 1.136 $element_state->{id_type} = 'form';
5474 wakaba 1.66 },
5475 wakaba 1.121 check_end => sub {
5476     my ($self, $item, $element_state) = @_;
5477     $self->_remove_minus_elements ($element_state);
5478    
5479     $HTMLFlowContentChecker{check_end}->(@_);
5480     },
5481 wakaba 1.52 };
5482    
5483     $Element->{$HTML_NS}->{fieldset} = {
5484 wakaba 1.134 %HTMLFlowContentChecker,
5485 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5486 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
5487     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5488 wakaba 1.136 form => $HTMLFormAttrChecker,
5489 wakaba 1.165 name => $FormControlNameAttrChecker,
5490 wakaba 1.56 }, {
5491 wakaba 1.52 %HTMLAttrStatus,
5492     %HTMLM12NCommonAttrStatus,
5493 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5494     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5495 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5496 wakaba 1.125 name => FEATURE_HTML5_DEFAULT,
5497 wakaba 1.52 }),
5498 wakaba 1.134 ## NOTE: legend, Flow
5499     check_child_element => sub {
5500     my ($self, $item, $child_el, $child_nsuri, $child_ln,
5501     $child_is_transparent, $element_state) = @_;
5502     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5503     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5504     $self->{onerror}->(node => $child_el,
5505     type => 'element not allowed:minus',
5506     level => $self->{level}->{must});
5507     $element_state->{has_non_legend} = 1;
5508     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5509     #
5510     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5511     if ($element_state->{has_non_legend}) {
5512     $self->{onerror}->(node => $child_el,
5513     type => 'element not allowed:details legend',
5514     level => $self->{level}->{must});
5515     }
5516     $element_state->{has_legend} = 1;
5517     $element_state->{has_non_legend} = 1;
5518     } else {
5519     $HTMLFlowContentChecker{check_child_element}->(@_);
5520     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5521     ## TODO:
5522 wakaba 1.167 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5523 wakaba 1.134 ## should be an error, since |object| is allowed as flow,
5524     ## therefore |details| part of the content model does not match.
5525     }
5526     },
5527     check_child_text => sub {
5528     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5529     if ($has_significant) {
5530     $element_state->{has_non_legend} = 1;
5531     }
5532     },
5533     check_end => sub {
5534     my ($self, $item, $element_state) = @_;
5535    
5536     unless ($element_state->{has_legend}) {
5537     $self->{onerror}->(node => $item->{node},
5538     type => 'child element missing',
5539     text => 'legend',
5540     level => $self->{level}->{must});
5541     }
5542    
5543     $HTMLFlowContentChecker{check_end}->(@_);
5544 wakaba 1.167 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5545 wakaba 1.134 },
5546     ## NOTE: This definition is partially reused by |details| element's
5547     ## checker.
5548 wakaba 1.52 };
5549    
5550     $Element->{$HTML_NS}->{input} = {
5551 wakaba 1.119 %HTMLEmptyChecker,
5552     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5553 wakaba 1.140 check_attrs => sub {
5554     my ($self, $item, $element_state) = @_;
5555 wakaba 1.142
5556 wakaba 1.145 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5557 wakaba 1.142 $state = 'text' unless defined $state;
5558     $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5559    
5560 wakaba 1.140 for my $attr (@{$item->{node}->attributes}) {
5561     my $attr_ns = $attr->namespace_uri;
5562     $attr_ns = '' unless defined $attr_ns;
5563     my $attr_ln = $attr->manakai_local_name;
5564     my $checker;
5565     my $status;
5566     if ($attr_ns eq '') {
5567     $status =
5568     {
5569     %HTMLAttrStatus,
5570     %HTMLM12NCommonAttrStatus,
5571     accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5572     'accept-charset' => FEATURE_HTML2X_RFC,
5573     accesskey => FEATURE_M12N10_REC,
5574     action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5575     align => FEATURE_M12N10_REC_DEPRECATED,
5576     alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5577     autocomplete => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5578     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5579     checked => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5580     datafld => FEATURE_HTML4_REC_RESERVED,
5581     dataformatas => FEATURE_HTML4_REC_RESERVED,
5582     datasrc => FEATURE_HTML4_REC_RESERVED,
5583     disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5584     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5585     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5586 wakaba 1.150 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5587     FEATURE_XHTMLBASIC11_CR,
5588 wakaba 1.140 ismap => FEATURE_M12N10_REC,
5589 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
5590 wakaba 1.140 list => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5591     max => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5592 wakaba 1.150 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X |
5593     FEATURE_M12N10_REC,
5594 wakaba 1.140 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5595     min => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5596 wakaba 1.156 multiple => FEATURE_HTML5_DEFAULT,
5597 wakaba 1.140 name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5598 wakaba 1.161 novalidate => FEATURE_HTML5_DEFAULT,
5599 wakaba 1.140 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5600     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5601     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5602     onformchange => FEATURE_WF2_INFORMATIVE,
5603     onforminput => FEATURE_WF2_INFORMATIVE,
5604     oninput => FEATURE_WF2,
5605     oninvalid => FEATURE_WF2,
5606     onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5607     pattern => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5608 wakaba 1.156 placeholder => FEATURE_HTML5_DEFAULT,
5609 wakaba 1.140 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5610     replace => FEATURE_WF2,
5611     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5612     sdapref => FEATURE_HTML20_RFC,
5613 wakaba 1.154 size => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5614 wakaba 1.140 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5615     step => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5616     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5617     target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
5618 wakaba 1.161 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO:dropped
5619 wakaba 1.140 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5620     usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
5621     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5622     }->{$attr_ln};
5623    
5624     $checker =
5625     {
5626 wakaba 1.141 ## NOTE: Value of an empty string means that the attribute is only
5627     ## applicable for a specific set of states.
5628 wakaba 1.142 accept => '',
5629 wakaba 1.149 'accept-charset' => $HTMLCharsetsAttrChecker,
5630     ## NOTE: To which states it applies is not defined in RFC 2070.
5631 wakaba 1.150 accesskey => '', ## NOTE: Not applied to |hidden| [WF2].
5632 wakaba 1.142 action => '',
5633 wakaba 1.150 align => '',
5634 wakaba 1.141 alt => '',
5635 wakaba 1.142 autocomplete => '',
5636 wakaba 1.165 autofocus => $AutofocusAttrChecker,
5637     ## NOTE: <input type=hidden disabled> is not disallowed.
5638 wakaba 1.142 checked => '',
5639     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5640 wakaba 1.165 ## NOTE: <input type=hidden disabled> is not disallowed.
5641 wakaba 1.142 enctype => '',
5642     form => $HTMLFormAttrChecker,
5643 wakaba 1.150 inputmode => '',
5644     ismap => '', ## NOTE: "MUST" be type=image [HTML4]
5645 wakaba 1.142 list => '',
5646     max => '',
5647     maxlength => '',
5648     method => '',
5649     min => '',
5650 wakaba 1.156 multiple => '',
5651 wakaba 1.165 name => $FormControlNameAttrChecker,
5652 wakaba 1.166 novalidate => '',
5653 wakaba 1.149 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
5654     onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
5655     oninput => $HTMLEventHandlerAttrChecker, # [WF2]
5656     oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
5657     ## TODO: tests for four attributes above
5658 wakaba 1.142 pattern => '',
5659 wakaba 1.156 placeholder => '',
5660 wakaba 1.142 readonly => '',
5661 wakaba 1.150 replace => '',
5662 wakaba 1.142 required => '',
5663     size => '',
5664     src => '',
5665     step => '',
5666     target => '',
5667 wakaba 1.140 type => $GetHTMLEnumeratedAttrChecker->({
5668 wakaba 1.156 hidden => 1, text => 1, search => 1, url => 1,
5669     email => 1, password => 1,
5670 wakaba 1.141 datetime => 1, date => 1, month => 1, week => 1, time => 1,
5671 wakaba 1.157 'datetime-local' => 1, number => 1, range => 1, color => 1,
5672     checkbox => 1,
5673 wakaba 1.141 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
5674     button => 1,
5675 wakaba 1.140 }),
5676 wakaba 1.151 usemap => '',
5677 wakaba 1.142 value => '',
5678 wakaba 1.140 }->{$attr_ln};
5679 wakaba 1.141
5680     ## State-dependent checkers
5681     unless ($checker) {
5682     if ($state eq 'hidden') {
5683     $checker =
5684     {
5685 wakaba 1.142 value => sub {
5686     my ($self, $attr, $item, $element_state) = @_;
5687 wakaba 1.145 my $name = $item->{node}->get_attribute_ns (undef, 'name');
5688 wakaba 1.142 if (defined $name and $name eq '_charset_') { ## case-sensitive
5689     $self->{onerror}->(node => $attr,
5690     type => '_charset_ value',
5691     level => $self->{level}->{must});
5692     }
5693     },
5694 wakaba 1.141 }->{$attr_ln} || $checker;
5695 wakaba 1.142 ## TODO: Warn if no name attribute?
5696     ## TODO: Warn if name!=_charset_ and no value attribute?
5697 wakaba 1.168 } elsif ({
5698     datetime => 1, date => 1, month => 1, time => 1,
5699     week => 1, 'datetime-local' => 1,
5700     }->{$state}) {
5701     my $v = {
5702     datetime => ['global_date_and_time_string'],
5703     date => ['date_string'],
5704     month => ['month_string'],
5705     week => ['week_string'],
5706     time => ['time_string'],
5707     'datetime-local' => ['local_date_and_time_string'],
5708     }->{$state};
5709 wakaba 1.144 $checker =
5710     {
5711 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5712 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5713     on => 1, off => 1,
5714     }),
5715 wakaba 1.158 list => $ListAttrChecker,
5716 wakaba 1.168 min => $GetDateTimeAttrChecker->($v->[0]),
5717     max => $GetDateTimeAttrChecker->($v->[0]),
5718 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5719 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5720 wakaba 1.148 step => $StepAttrChecker,
5721 wakaba 1.168 value => $GetDateTimeAttrChecker->($v->[0]),
5722 wakaba 1.144 }->{$attr_ln} || $checker;
5723     } elsif ($state eq 'number') {
5724     $checker =
5725     {
5726 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5727 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5728     on => 1, off => 1,
5729     }),
5730 wakaba 1.158 list => $ListAttrChecker,
5731 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5732     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5733 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5734 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5735 wakaba 1.148 step => $StepAttrChecker,
5736 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5737 wakaba 1.144 }->{$attr_ln} || $checker;
5738     } elsif ($state eq 'range') {
5739     $checker =
5740     {
5741 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5742 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5743     on => 1, off => 1,
5744     }),
5745 wakaba 1.158 list => $ListAttrChecker,
5746 wakaba 1.144 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5747     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5748 wakaba 1.148 step => $StepAttrChecker,
5749 wakaba 1.168 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
5750 wakaba 1.144 }->{$attr_ln} || $checker;
5751 wakaba 1.157 } elsif ($state eq 'color') {
5752     $checker =
5753     {
5754     accesskey => $HTMLAccesskeyAttrChecker,
5755     autocomplete => $GetHTMLEnumeratedAttrChecker->({
5756     on => 1, off => 1,
5757     }),
5758 wakaba 1.158 list => $ListAttrChecker,
5759 wakaba 1.157 value => sub {
5760     my ($self, $attr) = @_;
5761     unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
5762     $self->{onerror}->(node => $attr,
5763     type => 'scolor:syntax error', ## TODOC: type
5764     level => $self->{level}->{must});
5765     }
5766     },
5767     }->{$attr_ln} || $checker;
5768 wakaba 1.144 } elsif ($state eq 'checkbox' or $state eq 'radio') {
5769     $checker =
5770     {
5771 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5772 wakaba 1.149 checked => $GetHTMLBooleanAttrChecker->('checked'),
5773     ## ISSUE: checked value not (yet?) defined.
5774     ## TODO: tests
5775 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5776 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5777     }->{$attr_ln} || $checker;
5778     ## TODO: There MUST be another input type=radio with same
5779     ## name (Radio state).
5780     ## ISSUE: There should be exactly one type=radio with checked?
5781     } elsif ($state eq 'file') {
5782     $checker =
5783     {
5784 wakaba 1.161 accept => $AcceptAttrChecker,
5785 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5786 wakaba 1.168 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
5787 wakaba 1.159 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
5788 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5789 wakaba 1.144 }->{$attr_ln} || $checker;
5790     } elsif ($state eq 'submit') {
5791     $checker =
5792     {
5793 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5794 wakaba 1.149 action => $HTMLURIAttrChecker,
5795 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5796     'application/x-www-form-urlencoded' => 1,
5797     'multipart/form-data' => 1,
5798     'text/plain' => 1,
5799     }),
5800 wakaba 1.149 method => $GetHTMLEnumeratedAttrChecker->({
5801     get => 1, post => 1, put => 1, delete => 1,
5802     }),
5803 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5804 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5805     document => 1, values => 1,
5806     }),
5807     target => $HTMLTargetAttrChecker,
5808 wakaba 1.144 value => sub { }, ## NOTE: No restriction.
5809     }->{$attr_ln} || $checker;
5810     } elsif ($state eq 'image') {
5811     $checker =
5812     {
5813 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5814 wakaba 1.149 action => $HTMLURIAttrChecker,
5815     align => $GetHTMLEnumeratedAttrChecker->({
5816     top => 1, middle => 1, bottom => 1, left => 1, right => 1,
5817     }),
5818 wakaba 1.144 alt => sub {
5819     my ($self, $attr) = @_;
5820     my $value = $attr->value;
5821     unless (length $value) {
5822     $self->{onerror}->(node => $attr,
5823     type => 'empty anchor image alt',
5824     level => $self->{level}->{must});
5825     }
5826     },
5827 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
5828     'application/x-www-form-urlencoded' => 1,
5829     'multipart/form-data' => 1,
5830     'text/plain' => 1,
5831     }),
5832 wakaba 1.149 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
5833     method => $GetHTMLEnumeratedAttrChecker->({
5834     get => 1, post => 1, put => 1, delete => 1,
5835     }),
5836 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5837 wakaba 1.149 replace => $GetHTMLEnumeratedAttrChecker->({
5838     document => 1, values => 1,
5839     }),
5840 wakaba 1.144 src => $HTMLURIAttrChecker,
5841     ## TODO: There is requirements on the referenced resource.
5842 wakaba 1.149 target => $HTMLTargetAttrChecker,
5843     usemap => $HTMLUsemapAttrChecker,
5844 wakaba 1.144 }->{$attr_ln} || $checker;
5845     ## TODO: alt & src are required.
5846     } elsif ({
5847     reset => 1, button => 1,
5848     ## NOTE: From Web Forms 2.0:
5849     remove => 1, 'move-up' => 1, 'move-down' => 1,
5850     add => 1,
5851     }->{$state}) {
5852     $checker =
5853     {
5854 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5855 wakaba 1.144 ## NOTE: According to Web Forms 2.0, |input| attribute
5856     ## has |template| attribute to support the |add| button
5857     ## type (as part of the repetition template feature). It
5858     ## conflicts with the |template| global attribute
5859     ## introduced as part of the data template feature.
5860     ## NOTE: |template| attribute as defined in Web Forms 2.0
5861     ## has no author requirement.
5862     value => sub { }, ## NOTE: No restriction.
5863     }->{$attr_ln} || $checker;
5864 wakaba 1.156 } else { # Text, Search, E-mail, URL, Password
5865 wakaba 1.141 $checker =
5866     {
5867 wakaba 1.150 accesskey => $HTMLAccesskeyAttrChecker,
5868 wakaba 1.145 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5869     on => 1, off => 1,
5870     }),
5871 wakaba 1.149 ## TODO: inputmode [WF2]
5872 wakaba 1.158 list => $ListAttrChecker,
5873 wakaba 1.147 maxlength => sub {
5874     my ($self, $attr, $item, $element_state) = @_;
5875    
5876     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
5877    
5878 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
5879 wakaba 1.147 ## NOTE: Applying the rules for parsing non-negative
5880     ## integers results in a number.
5881     my $max_allowed_value_length = 0+$1;
5882    
5883     my $value = $item->{node}->get_attribute_ns (undef, 'value');
5884     if (defined $value) {
5885     my $codepoint_length = length $value;
5886 wakaba 1.162
5887 wakaba 1.147 if ($codepoint_length > $max_allowed_value_length) {
5888     $self->{onerror}
5889     ->(node => $item->{node}
5890     ->get_attribute_node_ns (undef, 'value'),
5891     type => 'value too long',
5892     level => $self->{level}->{must});
5893     }
5894     }
5895     }
5896     },
5897 wakaba 1.160 pattern => $PatternAttrChecker,
5898 wakaba 1.159 placeholder => sub {
5899     my ($self, $attr) = @_;
5900     if ($attr->value =~ /[\x0D\x0A]/) {
5901     $self->{onerror}->(node => $attr,
5902     type => 'newline in value', ## TODOC: type
5903     level => $self->{level}->{must});
5904     }
5905     },
5906 wakaba 1.145 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
5907 wakaba 1.146 required => $GetHTMLBooleanAttrChecker->('required'),
5908 wakaba 1.147 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
5909 wakaba 1.143 value => sub {
5910 wakaba 1.156 my ($self, $attr, $item, $element_state) = @_;
5911     if ($state eq 'url') {
5912     $HTMLURIAttrChecker->(@_);
5913     } elsif ($state eq 'email') {
5914     if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
5915     my @addr = split /,/, $attr->value, -1;
5916     @addr = ('') unless @addr;
5917     for (@addr) {
5918 wakaba 1.165 s/\A[\x09\x0A\x0C\x0D\x20]+//;
5919     s/[\x09\x0A\x0C\x0D\x20]\z//;
5920 wakaba 1.156 unless (/\A$ValidEmailAddress\z/) {
5921     $self->{onerror}->(node => $attr,
5922     type => 'email:syntax error', ## TODO: type
5923     value => $_,
5924     level => $self->{level}->{must});
5925     }
5926     }
5927     } else {
5928     unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
5929     $self->{onerror}->(node => $attr,
5930     type => 'email:syntax error', ## TODO: type
5931     level => $self->{level}->{must});
5932     }
5933     }
5934     } else {
5935     if ($attr->value =~ /[\x0D\x0A]/) {
5936     $self->{onerror}->(node => $attr,
5937     type => 'newline in value', ## TODO: type
5938     level => $self->{level}->{must});
5939     }
5940     }
5941 wakaba 1.143 },
5942 wakaba 1.141 }->{$attr_ln} || $checker;
5943 wakaba 1.147 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
5944 wakaba 1.156 $checker = $GetHTMLBooleanAttrChecker->('multiple')
5945     if $state eq 'email' and $attr_ln eq 'multiple';
5946 wakaba 1.161
5947     if ($item->{node}->has_attribute_ns (undef, 'pattern') and
5948     not $item->{node}->has_attribute_ns (undef, 'title')) {
5949     $self->{onerror}->(node => $item->{node},
5950     type => 'attribute missing',
5951     text => 'title',
5952     level => $self->{level}->{should});
5953     }
5954 wakaba 1.141 }
5955     }
5956    
5957     if (defined $checker) {
5958     if ($checker eq '') {
5959     $checker = sub {
5960     my ($self, $attr) = @_;
5961     $self->{onerror}->(node => $attr,
5962     type => 'input attr not applicable',
5963     text => $state,
5964     level => $self->{level}->{must});
5965     };
5966     }
5967 wakaba 1.140 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
5968     $attr_ln !~ /[A-Z]/) {
5969     $checker = $HTMLDatasetAttrChecker;
5970     $status = $HTMLDatasetAttrStatus;
5971     } else {
5972     $checker = $HTMLAttrChecker->{$attr_ln};
5973     }
5974     }
5975     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
5976     || $AttrChecker->{$attr_ns}->{''};
5977     $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
5978     || $AttrStatus->{$attr_ns}->{''};
5979     $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
5980 wakaba 1.157
5981     ## TODOC: accesskey="" is also applied to type=search and type=color
5982 wakaba 1.140
5983     if ($checker) {
5984     $checker->($self, $attr, $item, $element_state) if ref $checker;
5985     } elsif ($attr_ns eq '' and not $status) {
5986     #
5987     } else {
5988     $self->{onerror}->(node => $attr,
5989     type => 'unknown attribute',
5990     level => $self->{level}->{uncertain});
5991     ## ISSUE: No comformance createria for unknown attributes in the spec
5992     }
5993    
5994     $self->_attr_status_info ($attr, $status);
5995     }
5996 wakaba 1.168
5997     ## ISSUE: -0/+0
5998    
5999     if ($state eq 'range') {
6000     $element_state->{number_value}->{min} ||= 0;
6001     $element_state->{number_value}->{max} = 100
6002     unless defined $element_state->{number_value}->{max};
6003     }
6004    
6005     if (defined $element_state->{date_value}->{min} or
6006     defined $element_state->{date_value}->{max}) {
6007     my $min_value = $element_state->{date_value}->{min};
6008     my $max_value = $element_state->{date_value}->{max};
6009     my $value_value = $element_state->{date_value}->{value};
6010    
6011     if (defined $min_value and $min_value eq '' and
6012     (defined $max_value or defined $value_value)) {
6013     my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6014     $self->{onerror}->(node => $min,
6015     type => 'date value not supported', ## TODOC: type
6016     value => $min->value,
6017     level => $self->{level}->{unsupported});
6018     undef $min_value;
6019     }
6020     if (defined $max_value and $max_value eq '' and
6021     (defined $max_value or defined $value_value)) {
6022     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6023     $self->{onerror}->(node => $max,
6024     type => 'date value not supported', ## TODOC: type
6025     value => $max->value,
6026     level => $self->{level}->{unsupported});
6027     undef $max_value;
6028     }
6029     if (defined $value_value and $value_value eq '' and
6030     (defined $max_value or defined $min_value)) {
6031     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6032     $self->{onerror}->(node => $value,
6033     type => 'date value not supported', ## TODOC: type
6034     value => $value->value,
6035     level => $self->{level}->{unsupported});
6036     undef $value_value;
6037     }
6038    
6039     if (defined $min_value and defined $max_value) {
6040     if ($min_value->to_html5_number > $max_value->to_html5_number) {
6041     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6042     $self->{onerror}->(node => $max,
6043     type => 'max lt min', ## TODOC: type
6044     level => $self->{level}->{must});
6045     }
6046     }
6047    
6048     if (defined $min_value and defined $value_value) {
6049     if ($min_value->to_html5_number > $value_value->to_html5_number) {
6050     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6051     $self->{onerror}->(node => $value,
6052     type => 'value lt min', ## TODOC: type
6053     level => $self->{level}->{warn});
6054     ## NOTE: Not an error.
6055     }
6056     }
6057    
6058     if (defined $max_value and defined $value_value) {
6059     if ($max_value->to_html5_number < $value_value->to_html5_number) {
6060     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6061     $self->{onerror}->(node => $value,
6062     type => 'value gt max', ## TODOC: type
6063     level => $self->{level}->{warn});
6064     ## NOTE: Not an error.
6065     }
6066     }
6067     } elsif (defined $element_state->{number_value}->{min} or
6068     defined $element_state->{number_value}->{max}) {
6069     my $min_value = $element_state->{number_value}->{min};
6070     my $max_value = $element_state->{number_value}->{max};
6071     my $value_value = $element_state->{number_value}->{value};
6072    
6073     if (defined $min_value and defined $max_value) {
6074     if ($min_value > $max_value) {
6075     my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6076     $self->{onerror}->(node => $max,
6077     type => 'max lt min', ## TODOC: type
6078     level => $self->{level}->{must});
6079     }
6080     }
6081    
6082     if (defined $min_value and defined $value_value) {
6083     if ($min_value > $value_value) {
6084     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6085     $self->{onerror}->(node => $value,
6086     type => 'value lt min', ## TODOC: type
6087     level => $self->{level}->{warn});
6088     ## NOTE: Not an error.
6089     }
6090     }
6091    
6092     if (defined $max_value and defined $value_value) {
6093     if ($max_value < $value_value) {
6094     my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6095     $self->{onerror}->(node => $value,
6096     type => 'value gt max', ## TODOC: type
6097     level => $self->{level}->{warn});
6098     ## NOTE: Not an error.
6099     }
6100     }
6101     }
6102 wakaba 1.150
6103 wakaba 1.168 ## TODO: Warn unless value = min * x where x is an integer.
6104    
6105 wakaba 1.150 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6106     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6107     $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6108     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6109     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6110 wakaba 1.140 },
6111 wakaba 1.66 check_start => sub {
6112     my ($self, $item, $element_state) = @_;
6113 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6114     $self->{onerror}->(node => $item->{node},
6115     type => 'multiple labelable fae',
6116     level => $self->{level}->{must});
6117     } else {
6118     $self->{flag}->{has_labelable} = 2;
6119     }
6120 wakaba 1.138
6121     $element_state->{id_type} = 'labelable';
6122 wakaba 1.66 },
6123 wakaba 1.52 };
6124    
6125 wakaba 1.56 ## TODO: Form |name| attributes: MUST NOT conflict with RFC 3106 [WF2]
6126    
6127 wakaba 1.80 ## NOTE: "authors who are nesting repetition blocks should position such
6128     ## [repetition-block-related] buttons carefully to make clear which block a
6129 wakaba 1.150 ## button applies to." [WF2]: I have no idea how this can be tested.
6130 wakaba 1.80
6131 wakaba 1.52 $Element->{$HTML_NS}->{button} = {
6132 wakaba 1.119 %HTMLPhrasingContentChecker, ## ISSUE: -interactive?
6133     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6134 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6135 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6136 wakaba 1.165 ## ISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6137     ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6138     ## |button| elements.
6139 wakaba 1.56 action => $HTMLURIAttrChecker,
6140 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6141 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6142 wakaba 1.166 enctype => $GetHTMLEnumeratedAttrChecker->({
6143     'application/x-www-form-urlencoded' => 1,
6144     'multipart/form-data' => 1,
6145     'text/plain' => 1,
6146     }),
6147 wakaba 1.136 form => $HTMLFormAttrChecker,
6148 wakaba 1.56 method => $GetHTMLEnumeratedAttrChecker->({
6149     get => 1, post => 1, put => 1, delete => 1,
6150     }),
6151 wakaba 1.165 name => $FormControlNameAttrChecker,
6152 wakaba 1.166 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6153 wakaba 1.162 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6154     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6155 wakaba 1.56 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6156     target => $HTMLTargetAttrChecker,
6157 wakaba 1.80 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6158     ## attribute to support the |add| button type (as part of repetition
6159     ## template feature). It conflicts with the |template| global attribute
6160     ## introduced as part of the data template feature.
6161     ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6162     ## author requirement.
6163 wakaba 1.52 type => $GetHTMLEnumeratedAttrChecker->({
6164     button => 1, submit => 1, reset => 1,
6165     }),
6166 wakaba 1.162 value => sub {}, ## NOTE: No restriction.
6167 wakaba 1.52 }, {
6168     %HTMLAttrStatus,
6169     %HTMLM12NCommonAttrStatus,
6170     accesskey => FEATURE_M12N10_REC,
6171 wakaba 1.119 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6172     autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6173 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6174     dataformatas => FEATURE_HTML4_REC_RESERVED,
6175     datasrc => FEATURE_HTML4_REC_RESERVED,
6176 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6177     enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6178     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6179 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6180 wakaba 1.119 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6181     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6182 wakaba 1.162 novalidate => FEATURE_HTML5_DEFAULT,
6183 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6184     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6185 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6186     onforminput => FEATURE_WF2_INFORMATIVE,
6187 wakaba 1.56 replace => FEATURE_WF2,
6188 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6189 wakaba 1.119 target => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6190 wakaba 1.162 template => FEATURE_HTML5_AT_RISK | FEATURE_WF2, ## TODO: dropped
6191 wakaba 1.119 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6192     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6193 wakaba 1.52 }),
6194 wakaba 1.66 check_start => sub {
6195     my ($self, $item, $element_state) = @_;
6196 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6197     $self->{onerror}->(node => $item->{node},
6198     type => 'multiple labelable fae',
6199     level => $self->{level}->{must});
6200     } else {
6201     $self->{flag}->{has_labelable} = 2;
6202     }
6203 wakaba 1.162
6204     ## ISSUE: "The value attribute must not be present unless the form
6205     ## [content] attribute is present.": Wrong?
6206 wakaba 1.139
6207 wakaba 1.66 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6208     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6209 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6210     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6211 wakaba 1.138
6212     $element_state->{id_type} = 'labelable';
6213 wakaba 1.66 },
6214 wakaba 1.52 };
6215    
6216     $Element->{$HTML_NS}->{label} = {
6217 wakaba 1.139 %HTMLPhrasingContentChecker,
6218 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC
6219     | FEATURE_XHTML2_ED,
6220 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6221 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6222 wakaba 1.138 for => sub {
6223     my ($self, $attr) = @_;
6224    
6225     ## NOTE: MUST be an ID of a labelable element.
6226    
6227     push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6228     },
6229 wakaba 1.136 form => $HTMLFormAttrChecker,
6230 wakaba 1.52 }, {
6231     %HTMLAttrStatus,
6232 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
6233 wakaba 1.56 accesskey => FEATURE_WF2 | FEATURE_M12N10_REC,
6234 wakaba 1.119 for => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6235     form => FEATURE_HTML5_DEFAULT,
6236 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6237 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6238     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6239     }),
6240 wakaba 1.139 check_start => sub {
6241     my ($self, $item, $element_state) = @_;
6242     $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6243    
6244     $element_state->{has_label_original} = $self->{flag}->{has_label};
6245     $self->{flag}->{has_label} = 1;
6246     $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6247 wakaba 1.155 $self->{flag}->{has_labelable}
6248     = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6249 wakaba 1.139
6250     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6251     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6252     },
6253     check_end => sub {
6254     my ($self, $item, $element_state) = @_;
6255     $self->_remove_minus_elements ($element_state);
6256    
6257     if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6258     $self->{flag}->{has_labelable}
6259     = $element_state->{has_labelable_original};
6260     }
6261     delete $self->{flag}->{has_label}
6262     unless $element_state->{has_label_original};
6263     ## TODO: Warn if no labelable descendant? <input type=hidden>?
6264    
6265     ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6266    
6267     $HTMLPhrasingContentChecker{check_end}->(@_);
6268     },
6269 wakaba 1.52 ## TODO: Tests for <nest/> in <label>
6270     };
6271    
6272     $Element->{$HTML_NS}->{select} = {
6273 wakaba 1.121 %HTMLChecker,
6274 wakaba 1.163 ## ISSUE: HTML5 has no requirement like these:
6275 wakaba 1.52 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6276     ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6277 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6278 wakaba 1.56 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6279 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6280 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6281 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6282 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6283 wakaba 1.56 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6284 wakaba 1.136 form => $HTMLFormAttrChecker,
6285 wakaba 1.52 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6286 wakaba 1.165 name => $FormControlNameAttrChecker,
6287 wakaba 1.163 ## TODO: tests for on*
6288 wakaba 1.126 onformchange => $HTMLEventHandlerAttrChecker,
6289     onforminput => $HTMLEventHandlerAttrChecker,
6290     oninput => $HTMLEventHandlerAttrChecker,
6291 wakaba 1.56 oninvalid => $HTMLEventHandlerAttrChecker,
6292 wakaba 1.163 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6293 wakaba 1.52 }, {
6294     %HTMLAttrStatus,
6295     %HTMLM12NCommonAttrStatus,
6296 wakaba 1.56 accesskey => FEATURE_WF2,
6297 wakaba 1.119 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6298 wakaba 1.56 data => FEATURE_WF2,
6299 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6300     dataformatas => FEATURE_HTML4_REC_RESERVED,
6301     datasrc => FEATURE_HTML4_REC_RESERVED,
6302 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6303     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6304 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6305 wakaba 1.119 multiple => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6306     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6307 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6308     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6309 wakaba 1.126 onformchange => FEATURE_WF2_INFORMATIVE,
6310     onforminput => FEATURE_WF2_INFORMATIVE,
6311 wakaba 1.52 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6312 wakaba 1.126 oninput => FEATURE_WF2,
6313 wakaba 1.56 oninvalid => FEATURE_WF2,
6314 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6315     sdapref => FEATURE_HTML20_RFC,
6316 wakaba 1.119 size => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6317 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6318     }),
6319 wakaba 1.66 check_start => sub {
6320     my ($self, $item, $element_state) = @_;
6321 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6322     $self->{onerror}->(node => $item->{node},
6323     type => 'multiple labelable fae',
6324     level => $self->{level}->{must});
6325     } else {
6326     $self->{flag}->{has_labelable} = 2;
6327     }
6328 wakaba 1.66
6329     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6330     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6331 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6332     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6333 wakaba 1.138
6334     $element_state->{id_type} = 'labelable';
6335 wakaba 1.66 },
6336 wakaba 1.121 check_child_element => sub {
6337 wakaba 1.163 ## NOTE: (option | optgroup)*
6338    
6339 wakaba 1.121 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6340     $child_is_transparent, $element_state) = @_;
6341 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6342     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6343 wakaba 1.121 $self->{onerror}->(node => $child_el,
6344     type => 'element not allowed:minus',
6345     level => $self->{level}->{must});
6346     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6347     #
6348     } elsif ($child_nsuri eq $HTML_NS and
6349     {
6350     option => 1, optgroup => 1,
6351     }->{$child_ln}) {
6352     #
6353     } else {
6354     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6355     level => $self->{level}->{must});
6356     }
6357     },
6358     check_child_text => sub {
6359     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6360     if ($has_significant) {
6361     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6362     level => $self->{level}->{must});
6363     }
6364     },
6365 wakaba 1.52 };
6366 wakaba 1.1
6367 wakaba 1.52 $Element->{$HTML_NS}->{datalist} = {
6368 wakaba 1.121 %HTMLPhrasingContentChecker,
6369 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6370 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6371     data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6372     }, {
6373 wakaba 1.52 %HTMLAttrStatus,
6374 wakaba 1.56 data => FEATURE_WF2,
6375 wakaba 1.52 }),
6376 wakaba 1.66 check_start => sub {
6377     my ($self, $item, $element_state) = @_;
6378    
6379 wakaba 1.121 $element_state->{phase} = 'any'; # any | phrasing | option
6380    
6381 wakaba 1.66 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6382 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6383     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6384 wakaba 1.158
6385     $element_state->{id_type} = 'datalist';
6386 wakaba 1.66 },
6387 wakaba 1.121 ## NOTE: phrasing | option*
6388     check_child_element => sub {
6389     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6390     $child_is_transparent, $element_state) = @_;
6391 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6392     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6393 wakaba 1.121 $self->{onerror}->(node => $child_el,
6394     type => 'element not allowed:minus',
6395     level => $self->{level}->{must});
6396     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6397     #
6398     } elsif ($element_state->{phase} eq 'phrasing') {
6399     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6400     #
6401     } else {
6402     $self->{onerror}->(node => $child_el,
6403     type => 'element not allowed:phrasing',
6404     level => $self->{level}->{must});
6405     }
6406     } elsif ($element_state->{phase} eq 'option') {
6407     if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6408     #
6409     } else {
6410     $self->{onerror}->(node => $child_el,
6411     type => 'element not allowed',
6412     level => $self->{level}->{must});
6413     }
6414     } elsif ($element_state->{phase} eq 'any') {
6415     if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6416     $element_state->{phase} = 'phrasing';
6417     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6418     $element_state->{phase} = 'option';
6419     } else {
6420     $self->{onerror}->(node => $child_el,
6421     type => 'element not allowed',
6422     level => $self->{level}->{must});
6423     }
6424     } else {
6425     die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6426     }
6427     },
6428     check_child_text => sub {
6429     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6430     if ($has_significant) {
6431     if ($element_state->{phase} eq 'phrasing') {
6432     #
6433     } elsif ($element_state->{phase} eq 'any') {
6434     $element_state->{phase} = 'phrasing';
6435     } else {
6436     $self->{onerror}->(node => $child_node,
6437     type => 'character not allowed',
6438     level => $self->{level}->{must});
6439     }
6440     }
6441     },
6442     check_end => sub {
6443     my ($self, $item, $element_state) = @_;
6444     if ($element_state->{phase} eq 'phrasing') {
6445     if ($element_state->{has_significant}) {
6446     $item->{real_parent_state}->{has_significant} = 1;
6447     } elsif ($item->{transparent}) {
6448     #
6449     } else {
6450     $self->{onerror}->(node => $item->{node},
6451     type => 'no significant content',
6452     level => $self->{level}->{should});
6453     }
6454     } else {
6455     ## NOTE: Since the content model explicitly allows a |datalist| element
6456     ## being empty, we don't raise "no significant content" error for this
6457     ## element when there is no element. (We should raise an error for
6458     ## |<datalist><br></datalist>|, however.)
6459     ## NOTE: As a side-effect, when the |datalist| element only contains
6460     ## non-conforming content, then the |phase| flag has not changed from
6461     ## |any|, no "no significant content" error is raised neither.
6462     $HTMLChecker{check_end}->(@_);
6463     }
6464     },
6465 wakaba 1.52 };
6466 wakaba 1.49
6467 wakaba 1.52 $Element->{$HTML_NS}->{optgroup} = {
6468 wakaba 1.121 %HTMLChecker,
6469 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6470 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6471     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6472 wakaba 1.164 label => sub {},
6473 wakaba 1.52 }, {
6474     %HTMLAttrStatus,
6475     %HTMLM12NCommonAttrStatus,
6476 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6477     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6478 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6479 wakaba 1.52 }),
6480 wakaba 1.164 check_attrs2 => sub {
6481     my ($self, $item, $element_state) = @_;
6482    
6483     unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6484     $self->{onerror}->(node => $item->{node},
6485     type => 'attribute missing',
6486     text => 'label',
6487     level => $self->{level}->{must});
6488     }
6489     },
6490 wakaba 1.121 check_child_element => sub {
6491     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6492     $child_is_transparent, $element_state) = @_;
6493 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6494     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6495 wakaba 1.121 $self->{onerror}->(node => $child_el,
6496     type => 'element not allowed:minus',
6497     level => $self->{level}->{must});
6498     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6499     #
6500     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6501     #
6502     } else {
6503     $self->{onerror}->(node => $child_el, type => 'element not allowed',
6504     level => $self->{level}->{must});
6505     }
6506     },
6507     check_child_text => sub {
6508     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6509     if ($has_significant) {
6510     $self->{onerror}->(node => $child_node, type => 'character not allowed',
6511     level => $self->{level}->{must});
6512     }
6513     },
6514 wakaba 1.52 };
6515    
6516     $Element->{$HTML_NS}->{option} = {
6517     %HTMLTextChecker,
6518 wakaba 1.119 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6519 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6520     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6521 wakaba 1.164 label => sub {}, ## NOTE: No restriction.
6522     selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6523     value => sub {}, ## NOTE: No restriction.
6524 wakaba 1.52 }, {
6525     %HTMLAttrStatus,
6526     %HTMLM12NCommonAttrStatus,
6527 wakaba 1.119 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6528     label => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6529 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6530 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6531     sdapref => FEATURE_HTML20_RFC,
6532 wakaba 1.119 selected => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6533     value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6534 wakaba 1.52 }),
6535     };
6536 wakaba 1.49
6537 wakaba 1.52 $Element->{$HTML_NS}->{textarea} = {
6538     %HTMLTextChecker,
6539 wakaba 1.121 status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6540 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6541 wakaba 1.164 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6542 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
6543 wakaba 1.165 autofocus => $AutofocusAttrChecker,
6544 wakaba 1.164 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6545 wakaba 1.52 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6546 wakaba 1.136 form => $HTMLFormAttrChecker,
6547 wakaba 1.56 ## TODO: inputmode [WF2]
6548 wakaba 1.164 maxlength => sub {
6549     my ($self, $attr, $item, $element_state) = @_;
6550    
6551     $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6552    
6553 wakaba 1.165 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6554 wakaba 1.164 ## NOTE: Applying the rules for parsing non-negative integers
6555     ## results in a number.
6556     my $max_allowed_value_length = 0+$1;
6557    
6558     ## ISSUE: "The the purposes of this requirement," (typo)
6559    
6560     ## ISSUE: This constraint is applied w/o CRLF normalization to
6561     ## |value| attribute, but w/ CRLF normalization to
6562     ## concept-value.
6563     my $value = $item->{node}->text_content;
6564     if (defined $value) {
6565     my $codepoint_length = length $value;
6566    
6567     if ($codepoint_length > $max_allowed_value_length) {
6568     $self->{onerror}->(node => $item->{node},
6569     type => 'value too long',
6570     level => $self->{level}->{must});
6571     }
6572     }
6573     }
6574     },
6575 wakaba 1.165 name => $FormControlNameAttrChecker,
6576 wakaba 1.164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6577     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6578     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6579 wakaba 1.161 pattern => $PatternAttrChecker,
6580 wakaba 1.52 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6581 wakaba 1.56 required => $GetHTMLBooleanAttrChecker->('required'),
6582 wakaba 1.164 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6583     oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6584     oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
6585 wakaba 1.161 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
6586 wakaba 1.56 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
6587 wakaba 1.52 }, {
6588     %HTMLAttrStatus,
6589     %HTMLM12NCommonAttrStatus,
6590 wakaba 1.164 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6591 wakaba 1.61 'accept-charset' => FEATURE_HTML2X_RFC,
6592 wakaba 1.52 accesskey => FEATURE_M12N10_REC,
6593 wakaba 1.121 autofocus => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6594     cols => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6595 wakaba 1.52 datafld => FEATURE_HTML4_REC_RESERVED,
6596 wakaba 1.49 dataformatas => FEATURE_HTML4_REC_RESERVED,
6597     datasrc => FEATURE_HTML4_REC_RESERVED,
6598 wakaba 1.121 disabled => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6599     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6600 wakaba 1.164 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
6601 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6602 wakaba 1.121 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6603     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6604 wakaba 1.52 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6605     onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6606     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6607 wakaba 1.164 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6608     onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
6609     oninput => FEATURE_WF2, ## TODO: tests
6610     oninvalid => FEATURE_WF2, ## TODO: tests
6611 wakaba 1.52 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6612 wakaba 1.161 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6613 wakaba 1.121 readonly => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
6614     required => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6615     rows => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6616 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
6617     sdapref => FEATURE_HTML20_RFC,
6618 wakaba 1.52 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6619 wakaba 1.121 wrap => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6620 wakaba 1.52 }),
6621 wakaba 1.66 check_start => sub {
6622     my ($self, $item, $element_state) = @_;
6623 wakaba 1.139 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
6624     $self->{onerror}->(node => $item->{node},
6625     type => 'multiple labelable fae',
6626     level => $self->{level}->{must});
6627     } else {
6628     $self->{flag}->{has_labelable} = 2;
6629     }
6630 wakaba 1.164
6631     $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6632     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6633     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6634    
6635     $element_state->{id_type} = 'labelable';
6636     },
6637     check_attrs2 => sub {
6638     my ($self, $item, $element_state) = @_;
6639 wakaba 1.66
6640 wakaba 1.161 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6641     not $item->{node}->has_attribute_ns (undef, 'title')) {
6642     ## NOTE: WF2 (dropped by HTML5)
6643     $self->{onerror}->(node => $item->{node},
6644     type => 'attribute missing',
6645     text => 'title',
6646     level => $self->{level}->{should});
6647     }
6648    
6649 wakaba 1.164 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
6650     my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
6651     if (defined $wrap) {
6652     $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
6653     if ($wrap eq 'hard') {
6654     $self->{onerror}->(node => $item->{node},
6655     type => 'attribute missing',
6656     text => 'cols',
6657     level => $self->{level}->{must});
6658     }
6659     }
6660     }
6661 wakaba 1.66 },
6662 wakaba 1.52 };
6663 wakaba 1.49
6664 wakaba 1.52 $Element->{$HTML_NS}->{output} = {
6665 wakaba 1.121 %HTMLPhrasingContentChecker,
6666     status => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6667 wakaba 1.56 check_attrs => $GetHTMLAttrsChecker->({
6668 wakaba 1.165 for => sub {
6669     my ($self, $attr) = @_;
6670    
6671     ## NOTE: "Unordered set of unique space-separated tokens".
6672    
6673     my %word;
6674     for my $word (grep {length $_}
6675     split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
6676     unless ($word{$word}) {
6677     $word{$word} = 1;
6678     push @{$self->{idref}}, ['any', $word, $attr];
6679     } else {
6680     $self->{onerror}->(node => $attr, type => 'duplicate token',
6681     value => $word,
6682     level => $self->{level}->{must});
6683     }
6684     }
6685     },
6686 wakaba 1.136 form => $HTMLFormAttrChecker,
6687 wakaba 1.165 name => $FormControlNameAttrChecker,
6688     onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6689     onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6690 wakaba 1.56 }, {
6691 wakaba 1.52 %HTMLAttrStatus,
6692 wakaba 1.121 for => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6693     form => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6694     name => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
6695 wakaba 1.56 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
6696     onformchange => FEATURE_WF2,
6697     onforminput => FEATURE_WF2,
6698 wakaba 1.52 }),
6699     };
6700    
6701     $Element->{$HTML_NS}->{isindex} = {
6702     %HTMLEmptyChecker,
6703 wakaba 1.54 status => FEATURE_M12N10_REC_DEPRECATED |
6704     Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
6705 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
6706     prompt => sub {}, ## NOTE: Text [M12N]
6707     }, {
6708     %HTMLAttrStatus,
6709 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6710     dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6711     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6712     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6713 wakaba 1.52 prompt => FEATURE_M12N10_REC_DEPRECATED,
6714 wakaba 1.61 sdapref => FEATURE_HTML20_RFC,
6715 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6716     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6717 wakaba 1.52 }),
6718     ## TODO: Tests
6719     ## TODO: Tests for <nest/> in <isindex>
6720 wakaba 1.66 check_start => sub {
6721     my ($self, $item, $element_state) = @_;
6722    
6723     $element_state->{uri_info}->{action}->{type}->{action} = 1;
6724 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6725     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6726 wakaba 1.66 },
6727 wakaba 1.52 };
6728 wakaba 1.49
6729 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
6730 wakaba 1.40 %HTMLChecker,
6731 wakaba 1.153 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6732 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6733 wakaba 1.91 charset => sub {
6734     my ($self, $attr) = @_;
6735    
6736     unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
6737     $self->{onerror}->(type => 'attribute not allowed',
6738     node => $attr,
6739 wakaba 1.104 level => $self->{level}->{must});
6740 wakaba 1.91 }
6741    
6742     $HTMLCharsetChecker->($attr->value, @_);
6743     },
6744 wakaba 1.86 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
6745 wakaba 1.91 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
6746 wakaba 1.1 defer => $GetHTMLBooleanAttrChecker->('defer'),
6747     async => $GetHTMLBooleanAttrChecker->('async'),
6748 wakaba 1.91 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
6749 wakaba 1.49 }, {
6750     %HTMLAttrStatus,
6751 wakaba 1.153 async => FEATURE_HTML5_WD,
6752     charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6753     defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6754 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
6755     for => FEATURE_HTML4_REC_RESERVED,
6756 wakaba 1.154 href => FEATURE_RDFA_REC,
6757 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6758 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
6759 wakaba 1.153 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6760     type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6761 wakaba 1.9 }),
6762 wakaba 1.40 check_start => sub {
6763     my ($self, $item, $element_state) = @_;
6764 wakaba 1.1
6765 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
6766     $element_state->{must_be_empty} = 1;
6767 wakaba 1.1 } else {
6768     ## NOTE: No content model conformance in HTML5 spec.
6769 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
6770     my $language = $item->{node}->get_attribute_ns (undef, 'language');
6771 wakaba 1.1 if ((defined $type and $type eq '') or
6772     (defined $language and $language eq '')) {
6773     $type = 'text/javascript';
6774     } elsif (defined $type) {
6775     #
6776     } elsif (defined $language) {
6777     $type = 'text/' . $language;
6778     } else {
6779     $type = 'text/javascript';
6780     }
6781 wakaba 1.93
6782     if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
6783     $type = "$1/$2";
6784     $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
6785     ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
6786     }
6787     $element_state->{script_type} = $type;
6788 wakaba 1.40 }
6789 wakaba 1.66
6790     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6791 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6792     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6793 wakaba 1.107
6794     $element_state->{text} = '';
6795 wakaba 1.40 },
6796     check_child_element => sub {
6797     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6798     $child_is_transparent, $element_state) = @_;
6799 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6800     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6801 wakaba 1.40 $self->{onerror}->(node => $child_el,
6802     type => 'element not allowed:minus',
6803 wakaba 1.104 level => $self->{level}->{must});
6804 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6805     #
6806     } else {
6807     if ($element_state->{must_be_empty}) {
6808     $self->{onerror}->(node => $child_el,
6809 wakaba 1.104 type => 'element not allowed:empty',
6810     level => $self->{level}->{must});
6811 wakaba 1.40 }
6812     }
6813     },
6814     check_child_text => sub {
6815     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6816     if ($has_significant and
6817     $element_state->{must_be_empty}) {
6818     $self->{onerror}->(node => $child_node,
6819 wakaba 1.104 type => 'character not allowed:empty',
6820     level => $self->{level}->{must});
6821 wakaba 1.40 }
6822 wakaba 1.115 $element_state->{text} .= $child_node->data;
6823 wakaba 1.40 },
6824     check_end => sub {
6825     my ($self, $item, $element_state) = @_;
6826     unless ($element_state->{must_be_empty}) {
6827 wakaba 1.93 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
6828     ## NOTE: XML content should be checked by THIS instance of checker
6829     ## as part of normal tree validation.
6830 wakaba 1.104 $self->{onerror}->(node => $item->{node},
6831     type => 'XML script lang',
6832     text => $element_state->{script_type},
6833     level => $self->{level}->{uncertain});
6834     ## ISSUE: Should we raise some kind of error for
6835     ## <script type="text/xml">aaaaa</script>?
6836     ## NOTE: ^^^ This is why we throw an "uncertain" error.
6837 wakaba 1.93 } else {
6838     $self->{onsubdoc}->({s => $element_state->{text},
6839     container_node => $item->{node},
6840     media_type => $element_state->{script_type},
6841     is_char_string => 1});
6842     }
6843 wakaba 1.40
6844     $HTMLChecker{check_end}->(@_);
6845 wakaba 1.1 }
6846     },
6847 wakaba 1.91 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
6848     ## NOTE: "When used to include script data, the script data must be embedded
6849     ## inline, the format of the data must be given using the type attribute,
6850     ## and the src attribute must not be specified." - not testable.
6851     ## TODO: It would be possible to err <script type=text/plain src=...>
6852 wakaba 1.1 };
6853 wakaba 1.25 ## ISSUE: Significant check and text child node
6854 wakaba 1.1
6855     ## NOTE: When script is disabled.
6856     $Element->{$HTML_NS}->{noscript} = {
6857 wakaba 1.40 %HTMLTransparentChecker,
6858 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6859 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, {
6860     %HTMLAttrStatus,
6861     %HTMLM12NCommonAttrStatus,
6862 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
6863 wakaba 1.49 }),
6864 wakaba 1.40 check_start => sub {
6865     my ($self, $item, $element_state) = @_;
6866 wakaba 1.3
6867 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
6868 wakaba 1.104 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
6869     level => $self->{level}->{must});
6870 wakaba 1.3 }
6871    
6872 wakaba 1.40 unless ($self->{flag}->{in_head}) {
6873     $self->_add_minus_elements ($element_state,
6874     {$HTML_NS => {noscript => 1}});
6875     }
6876 wakaba 1.79
6877     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6878     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6879 wakaba 1.3 },
6880 wakaba 1.40 check_child_element => sub {
6881     my ($self, $item, $child_el, $child_nsuri, $child_ln,
6882     $child_is_transparent, $element_state) = @_;
6883     if ($self->{flag}->{in_head}) {
6884 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6885     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6886 wakaba 1.40 $self->{onerror}->(node => $child_el,
6887     type => 'element not allowed:minus',
6888 wakaba 1.104 level => $self->{level}->{must});
6889 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6890     #
6891     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
6892     #
6893     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
6894     if ($child_el->has_attribute_ns (undef, 'scoped')) {
6895     $self->{onerror}->(node => $child_el,
6896     type => 'element not allowed:head noscript',
6897 wakaba 1.104 level => $self->{level}->{must});
6898 wakaba 1.40 }
6899     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
6900 wakaba 1.47 my $http_equiv_attr
6901     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
6902     if ($http_equiv_attr) {
6903     ## TODO: case
6904     if (lc $http_equiv_attr->value eq 'content-type') {
6905 wakaba 1.40 $self->{onerror}->(node => $child_el,
6906 wakaba 1.34 type => 'element not allowed:head noscript',
6907 wakaba 1.104 level => $self->{level}->{must});
6908 wakaba 1.47 } else {
6909     #
6910 wakaba 1.3 }
6911 wakaba 1.47 } else {
6912     $self->{onerror}->(node => $child_el,
6913     type => 'element not allowed:head noscript',
6914 wakaba 1.104 level => $self->{level}->{must});
6915 wakaba 1.3 }
6916 wakaba 1.40 } else {
6917     $self->{onerror}->(node => $child_el,
6918     type => 'element not allowed:head noscript',
6919 wakaba 1.104 level => $self->{level}->{must});
6920 wakaba 1.40 }
6921     } else {
6922     $HTMLTransparentChecker{check_child_element}->(@_);
6923     }
6924     },
6925     check_child_text => sub {
6926     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6927     if ($self->{flag}->{in_head}) {
6928     if ($has_significant) {
6929     $self->{onerror}->(node => $child_node,
6930 wakaba 1.104 type => 'character not allowed',
6931     level => $self->{level}->{must});
6932 wakaba 1.3 }
6933     } else {
6934 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
6935     }
6936     },
6937     check_end => sub {
6938     my ($self, $item, $element_state) = @_;
6939     $self->_remove_minus_elements ($element_state);
6940     if ($self->{flag}->{in_head}) {
6941     $HTMLChecker{check_end}->(@_);
6942     } else {
6943     $HTMLPhrasingContentChecker{check_end}->(@_);
6944 wakaba 1.3 }
6945 wakaba 1.1 },
6946     };
6947 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
6948 wakaba 1.1
6949     $Element->{$HTML_NS}->{'event-source'} = {
6950 wakaba 1.40 %HTMLEmptyChecker,
6951 wakaba 1.118 status => FEATURE_HTML5_LC_DROPPED,
6952     check_attrs => $GetHTMLAttrsChecker->({
6953     src => $HTMLURIAttrChecker,
6954     }, {
6955     %HTMLAttrStatus,
6956     src => FEATURE_HTML5_LC_DROPPED,
6957     }),
6958     check_start => sub {
6959     my ($self, $item, $element_state) = @_;
6960    
6961     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6962     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6963     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6964     },
6965     };
6966    
6967     $Element->{$HTML_NS}->{eventsource} = {
6968     %HTMLEmptyChecker,
6969 wakaba 1.153 status => FEATURE_HTML5_WD,
6970 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6971 wakaba 1.1 src => $HTMLURIAttrChecker,
6972 wakaba 1.50 }, {
6973     %HTMLAttrStatus,
6974 wakaba 1.153 src => FEATURE_HTML5_WD,
6975 wakaba 1.1 }),
6976 wakaba 1.66 check_start => sub {
6977     my ($self, $item, $element_state) = @_;
6978    
6979     $element_state->{uri_info}->{src}->{type}->{resource} = 1;
6980 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6981     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6982 wakaba 1.66 },
6983 wakaba 1.1 };
6984    
6985     $Element->{$HTML_NS}->{details} = {
6986 wakaba 1.134 %{$Element->{$HTML_NS}->{fieldset}},
6987 wakaba 1.153 status => FEATURE_HTML5_LC,
6988 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
6989 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
6990 wakaba 1.50 }, {
6991     %HTMLAttrStatus,
6992 wakaba 1.153 open => FEATURE_HTML5_LC,
6993 wakaba 1.1 }),
6994     };
6995    
6996     $Element->{$HTML_NS}->{datagrid} = {
6997 wakaba 1.72 %HTMLFlowContentChecker,
6998 wakaba 1.48 status => FEATURE_HTML5_WD,
6999 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7000 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7001     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7002 wakaba 1.50 }, {
7003     %HTMLAttrStatus,
7004     disabled => FEATURE_HTML5_WD,
7005     multiple => FEATURE_HTML5_WD,
7006 wakaba 1.1 }),
7007 wakaba 1.40 check_start => sub {
7008     my ($self, $item, $element_state) = @_;
7009 wakaba 1.1
7010 wakaba 1.40 $self->_add_minus_elements ($element_state,
7011     {$HTML_NS => {a => 1, datagrid => 1}});
7012     $element_state->{phase} = 'any';
7013 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7014     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7015 wakaba 1.40 },
7016 wakaba 1.95 ## NOTE: Flow -(text* (table|select|datalist) Flow*) | table | select |
7017     ## datalist | Empty
7018 wakaba 1.40 check_child_element => sub {
7019     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7020     $child_is_transparent, $element_state) = @_;
7021 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7022     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7023 wakaba 1.40 $self->{onerror}->(node => $child_el,
7024     type => 'element not allowed:minus',
7025 wakaba 1.104 level => $self->{level}->{must});
7026 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7027     #
7028 wakaba 1.72 } elsif ($element_state->{phase} eq 'flow') {
7029     if ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
7030 wakaba 1.44 if (not $element_state->{has_element} and
7031 wakaba 1.40 $child_nsuri eq $HTML_NS and
7032 wakaba 1.95 {
7033     table => 1, select => 1, datalist => 1,
7034     }->{$child_ln}) {
7035 wakaba 1.40 $self->{onerror}->(node => $child_el,
7036 wakaba 1.104 type => 'element not allowed',
7037     level => $self->{level}->{must});
7038 wakaba 1.40 } else {
7039 wakaba 1.8 #
7040 wakaba 1.1 }
7041 wakaba 1.40 } else {
7042     $self->{onerror}->(node => $child_el,
7043 wakaba 1.121 type => 'element not allowed', ## TODO: :flow
7044 wakaba 1.104 level => $self->{level}->{must});
7045 wakaba 1.40 }
7046 wakaba 1.43 $element_state->{has_element} = 1;
7047 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
7048     if ($child_nsuri eq $HTML_NS and
7049     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
7050     $element_state->{phase} = 'none';
7051 wakaba 1.72 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
7052 wakaba 1.40 $element_state->{has_element} = 1;
7053 wakaba 1.72 $element_state->{phase} = 'flow';
7054 wakaba 1.40 } else {
7055     $self->{onerror}->(node => $child_el,
7056 wakaba 1.104 type => 'element not allowed',
7057     level => $self->{level}->{must});
7058 wakaba 1.40 }
7059     } elsif ($element_state->{phase} eq 'none') {
7060     $self->{onerror}->(node => $child_el,
7061 wakaba 1.104 type => 'element not allowed',
7062     level => $self->{level}->{must});
7063 wakaba 1.40 } else {
7064     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
7065     }
7066     },
7067     check_child_text => sub {
7068     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7069     if ($has_significant) {
7070 wakaba 1.72 if ($element_state->{phase} eq 'flow') {
7071 wakaba 1.40 #
7072     } elsif ($element_state->{phase} eq 'any') {
7073 wakaba 1.72 $element_state->{phase} = 'flow';
7074 wakaba 1.40 } else {
7075     $self->{onerror}->(node => $child_node,
7076 wakaba 1.104 type => 'character not allowed',
7077     level => $self->{level}->{must});
7078 wakaba 1.1 }
7079     }
7080 wakaba 1.40 },
7081     check_end => sub {
7082     my ($self, $item, $element_state) = @_;
7083     $self->_remove_minus_elements ($element_state);
7084 wakaba 1.1
7085 wakaba 1.95 if ($element_state->{phase} eq 'flow') {
7086     if ($element_state->{has_significant}) {
7087     $item->{real_parent_state}->{has_significant} = 1;
7088     } elsif ($item->{transparent}) {
7089     #
7090     } else {
7091     $self->{onerror}->(node => $item->{node},
7092 wakaba 1.104 type => 'no significant content',
7093 wakaba 1.110 level => $self->{level}->{should});
7094 wakaba 1.95 }
7095     } else {
7096     ## NOTE: Since the content model explicitly allows a |datagird| element
7097     ## being empty, we don't raise "no significant content" error for this
7098     ## element when there is no element. (We should raise an error for
7099     ## |<datagrid><br></datagrid>|, however.)
7100     ## NOTE: As a side-effect, when the |datagrid| element only contains
7101     ## non-conforming content, then the |phase| flag has not changed from
7102     ## |any|, no "no significant content" error is raised neither.
7103     ## NOTE: Another side-effect of the current implementation:
7104     ## |<daragrid><datagrid/></datagrid>| has no "no significant content"
7105     ## error at all.
7106 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7107     }
7108     },
7109 wakaba 1.1 };
7110    
7111     $Element->{$HTML_NS}->{command} = {
7112 wakaba 1.40 %HTMLEmptyChecker,
7113 wakaba 1.48 status => FEATURE_HTML5_WD,
7114 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7115 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
7116     default => $GetHTMLBooleanAttrChecker->('default'),
7117     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7118     icon => $HTMLURIAttrChecker,
7119     label => sub { }, ## NOTE: No conformance creteria
7120     radiogroup => sub { }, ## NOTE: No conformance creteria
7121     type => sub {
7122     my ($self, $attr) = @_;
7123     my $value = $attr->value;
7124     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
7125 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
7126     level => $self->{level}->{must});
7127 wakaba 1.1 }
7128     },
7129 wakaba 1.50 }, {
7130     %HTMLAttrStatus,
7131     checked => FEATURE_HTML5_WD,
7132     default => FEATURE_HTML5_WD,
7133     disabled => FEATURE_HTML5_WD,
7134     icon => FEATURE_HTML5_WD,
7135     label => FEATURE_HTML5_WD,
7136     radiogroup => FEATURE_HTML5_WD,
7137     type => FEATURE_HTML5_WD,
7138 wakaba 1.1 }),
7139 wakaba 1.66 check_start => sub {
7140     my ($self, $item, $element_state) = @_;
7141    
7142     $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7143 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7144     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7145 wakaba 1.66 },
7146 wakaba 1.115 };
7147    
7148     $Element->{$HTML_NS}->{bb} = {
7149     %HTMLPhrasingContentChecker,
7150 wakaba 1.153 status => FEATURE_HTML5_WD,
7151 wakaba 1.115 check_attrs => $GetHTMLAttrsChecker->({
7152     type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7153     }, {
7154     %HTMLAttrStatus,
7155 wakaba 1.153 type => FEATURE_HTML5_WD,
7156 wakaba 1.115 }),
7157 wakaba 1.130 check_start => sub {
7158     my ($self, $item, $element_state) = @_;
7159     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7160    
7161     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7162     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7163     },
7164     check_end => sub {
7165     my ($self, $item, $element_state) = @_;
7166     $self->_remove_minus_elements ($element_state);
7167    
7168     $HTMLTransparentChecker{check_end}->(@_);
7169     },
7170 wakaba 1.1 };
7171    
7172     $Element->{$HTML_NS}->{menu} = {
7173 wakaba 1.40 %HTMLPhrasingContentChecker,
7174 wakaba 1.54 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7175     status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7176     ## NOTE: We don't want any |menu| element warned as deprecated.
7177 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7178 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7179 wakaba 1.68 compact => $GetHTMLBooleanAttrChecker->('compact'),
7180 wakaba 1.135 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7181     ## implementation, it does not match.)
7182 wakaba 1.1 label => sub { }, ## NOTE: No conformance creteria
7183     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7184 wakaba 1.49 }, {
7185     %HTMLAttrStatus,
7186     %HTMLM12NCommonAttrStatus,
7187 wakaba 1.61 align => FEATURE_HTML2X_RFC,
7188 wakaba 1.113 autosubmit => FEATURE_HTML5_DROPPED,
7189 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
7190 wakaba 1.50 label => FEATURE_HTML5_WD,
7191 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7192 wakaba 1.61 sdaform => FEATURE_HTML20_RFC,
7193     sdapref => FEATURE_HTML20_RFC,
7194 wakaba 1.50 type => FEATURE_HTML5_WD,
7195 wakaba 1.1 }),
7196 wakaba 1.40 check_start => sub {
7197     my ($self, $item, $element_state) = @_;
7198     $element_state->{phase} = 'li or phrasing';
7199     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
7200     $self->{flag}->{in_menu} = 1;
7201 wakaba 1.79
7202     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7203     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7204 wakaba 1.135 $element_state->{id_type} = 'menu';
7205 wakaba 1.40 },
7206     check_child_element => sub {
7207     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7208     $child_is_transparent, $element_state) = @_;
7209 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7210     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7211 wakaba 1.40 $self->{onerror}->(node => $child_el,
7212     type => 'element not allowed:minus',
7213 wakaba 1.104 level => $self->{level}->{must});
7214 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7215     #
7216     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7217     if ($element_state->{phase} eq 'li') {
7218     #
7219     } elsif ($element_state->{phase} eq 'li or phrasing') {
7220     $element_state->{phase} = 'li';
7221     } else {
7222 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7223     level => $self->{level}->{must});
7224 wakaba 1.40 }
7225     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7226     if ($element_state->{phase} eq 'phrasing') {
7227     #
7228     } elsif ($element_state->{phase} eq 'li or phrasing') {
7229     $element_state->{phase} = 'phrasing';
7230     } else {
7231 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7232     level => $self->{level}->{must});
7233 wakaba 1.40 }
7234     } else {
7235 wakaba 1.104 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7236     level => $self->{level}->{must});
7237 wakaba 1.40 }
7238     },
7239     check_child_text => sub {
7240     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7241     if ($has_significant) {
7242     if ($element_state->{phase} eq 'phrasing') {
7243     #
7244     } elsif ($element_state->{phase} eq 'li or phrasing') {
7245     $element_state->{phase} = 'phrasing';
7246     } else {
7247     $self->{onerror}->(node => $child_node,
7248 wakaba 1.104 type => 'character not allowed',
7249     level => $self->{level}->{must});
7250 wakaba 1.1 }
7251     }
7252 wakaba 1.40 },
7253     check_end => sub {
7254     my ($self, $item, $element_state) = @_;
7255     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
7256    
7257     if ($element_state->{phase} eq 'li') {
7258     $HTMLChecker{check_end}->(@_);
7259     } else { # 'phrasing' or 'li or phrasing'
7260     $HTMLPhrasingContentChecker{check_end}->(@_);
7261 wakaba 1.1 }
7262     },
7263 wakaba 1.8 };
7264    
7265     $Element->{$HTML_NS}->{datatemplate} = {
7266 wakaba 1.40 %HTMLChecker,
7267 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7268 wakaba 1.40 check_child_element => sub {
7269     my ($self, $item, $child_el, $child_nsuri, $child_ln,
7270     $child_is_transparent, $element_state) = @_;
7271 wakaba 1.130 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7272     $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7273 wakaba 1.40 $self->{onerror}->(node => $child_el,
7274     type => 'element not allowed:minus',
7275 wakaba 1.104 level => $self->{level}->{must});
7276 wakaba 1.40 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7277     #
7278     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7279     #
7280     } else {
7281     $self->{onerror}->(node => $child_el,
7282 wakaba 1.104 type => 'element not allowed:datatemplate',
7283     level => $self->{level}->{must});
7284 wakaba 1.40 }
7285     },
7286     check_child_text => sub {
7287     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7288     if ($has_significant) {
7289 wakaba 1.104 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7290     level => $self->{level}->{must});
7291 wakaba 1.8 }
7292     },
7293     is_xml_root => 1,
7294     };
7295    
7296     $Element->{$HTML_NS}->{rule} = {
7297 wakaba 1.40 %HTMLChecker,
7298 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7299 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7300 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
7301 wakaba 1.92 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7302 wakaba 1.50 }, {
7303     %HTMLAttrStatus,
7304     condition => FEATURE_HTML5_AT_RISK,
7305     mode => FEATURE_HTML5_AT_RISK,
7306 wakaba 1.8 }),
7307 wakaba 1.40 check_start => sub {
7308     my ($self, $item, $element_state) = @_;
7309 wakaba 1.79
7310 wakaba 1.40 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7311 wakaba 1.79 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7312     $self->{flag}->{in_rule} = 1;
7313    
7314     $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7315     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7316 wakaba 1.40 },
7317     check_child_element => sub { },
7318     check_child_text => sub { },
7319     check_end => sub {
7320     my ($self, $item, $element_state) = @_;
7321 wakaba 1.79
7322 wakaba 1.40 $self->_remove_plus_elements ($element_state);
7323 wakaba 1.79 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7324    
7325 wakaba 1.40 $HTMLChecker{check_end}->(@_);
7326 wakaba 1.8 },
7327     ## NOTE: "MAY be anything that, when the parent |datatemplate|
7328     ## is applied to some conforming data, results in a conforming DOM tree.":
7329     ## We don't check against this.
7330     };
7331    
7332     $Element->{$HTML_NS}->{nest} = {
7333 wakaba 1.40 %HTMLEmptyChecker,
7334 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
7335 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
7336 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
7337     mode => sub {
7338     my ($self, $attr) = @_;
7339     my $value = $attr->value;
7340 wakaba 1.132 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7341 wakaba 1.104 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7342     level => $self->{level}->{must});
7343 wakaba 1.23 }
7344     },
7345 wakaba 1.50 }, {
7346     %HTMLAttrStatus,
7347     filter => FEATURE_HTML5_AT_RISK,
7348     mode => FEATURE_HTML5_AT_RISK,
7349 wakaba 1.8 }),
7350 wakaba 1.1 };
7351    
7352     $Element->{$HTML_NS}->{legend} = {
7353 wakaba 1.40 %HTMLPhrasingContentChecker,
7354 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7355 wakaba 1.52 check_attrs => $GetHTMLAttrsChecker->({
7356 wakaba 1.66 accesskey => $HTMLAccesskeyAttrChecker,
7357 wakaba 1.52 # align => $GetHTMLEnumeratedAttrChecker->({
7358     # top => 1, bottom => 1, left => 1, right => 1,
7359     # }),
7360 wakaba 1.167 form => $HTMLFormAttrChecker,
7361 wakaba 1.52 }, {
7362 wakaba 1.49 %HTMLAttrStatus,
7363     %HTMLM12NCommonAttrStatus,
7364     accesskey => FEATURE_M12N10_REC,
7365     align => FEATURE_M12N10_REC_DEPRECATED,
7366 wakaba 1.167 form => FEATURE_HTML5_DROPPED,
7367 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7368 wakaba 1.49 }),
7369 wakaba 1.1 };
7370    
7371     $Element->{$HTML_NS}->{div} = {
7372 wakaba 1.72 %HTMLFlowContentChecker,
7373 wakaba 1.153 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
7374 wakaba 1.68 check_attrs => $GetHTMLAttrsChecker->({
7375     align => $GetHTMLEnumeratedAttrChecker->({
7376     left => 1, center => 1, right => 1, justify => 1,
7377     }),
7378     }, {
7379 wakaba 1.49 %HTMLAttrStatus,
7380 wakaba 1.82 %HTMLM12NXHTML2CommonAttrStatus,
7381 wakaba 1.49 align => FEATURE_M12N10_REC_DEPRECATED,
7382     datafld => FEATURE_HTML4_REC_RESERVED,
7383     dataformatas => FEATURE_HTML4_REC_RESERVED,
7384     datasrc => FEATURE_HTML4_REC_RESERVED,
7385 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7386 wakaba 1.49 }),
7387 wakaba 1.66 check_start => sub {
7388     my ($self, $item, $element_state) = @_;
7389    
7390     $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7391 wakaba 1.79 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7392     $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7393 wakaba 1.66 },
7394 wakaba 1.1 };
7395    
7396 wakaba 1.64 $Element->{$HTML_NS}->{center} = {
7397 wakaba 1.72 %HTMLFlowContentChecker,
7398 wakaba 1.64 status => FEATURE_M12N10_REC_DEPRECATED,
7399     check_attrs => $GetHTMLAttrsChecker->({}, {
7400     %HTMLAttrStatus,
7401     %HTMLM12NCommonAttrStatus,
7402 wakaba 1.153 lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7403 wakaba 1.64 }),
7404     };
7405    
7406 wakaba 1.1 $Element->{$HTML_NS}->{font} = {
7407 wakaba 1.40 %HTMLTransparentChecker,
7408 wakaba 1.78 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7409 wakaba 1.70 check_attrs => $GetHTMLAttrsChecker->({
7410     ## TODO: HTML4 |size|, |color|, |face|
7411 wakaba 1.49 }, {
7412     %HTMLAttrStatus,
7413 wakaba 1.153 class => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7414 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
7415 wakaba 1.153 dir => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7416 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
7417 wakaba 1.153 id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7418     lang => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7419 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
7420 wakaba 1.153 style => FEATURE_HTML5_WD | FEATURE_XHTML10_REC,
7421     title => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7422 wakaba 1.49 }),
7423 wakaba 1.78 ## NOTE: When the |font| element was defined in the HTML5 specification,
7424     ## it is allowed only in a document with the WYSIWYG signature. The
7425     ## checker does not check whether there is the signature, since the
7426     ## signature is dropped, too, and has never been implemented. (In addition,
7427     ## for any |font| element an "element not defined" error is raised anyway,
7428     ## such that we don't have to raise an additional error.)
7429 wakaba 1.1 };
7430 wakaba 1.49
7431 wakaba 1.64 $Element->{$HTML_NS}->{basefont} = {
7432     %HTMLEmptyChecker,
7433     status => FEATURE_M12N10_REC_DEPRECATED,
7434     check_attrs => $GetHTMLAttrsChecker->({
7435     ## TODO: color, face, size
7436     }, {
7437     %HTMLAttrStatus,
7438     color => FEATURE_M12N10_REC_DEPRECATED,
7439     face => FEATURE_M12N10_REC_DEPRECATED,
7440 wakaba 1.153 #id => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
7441     id => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7442 wakaba 1.64 size => FEATURE_M12N10_REC_DEPRECATED,
7443     }),
7444     };
7445    
7446 wakaba 1.49 ## TODO: frameset FEATURE_M12N10_REC
7447     ## class title id cols rows onload onunload style(x10)
7448     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7449     ## noframes Common, lang(xhtml10)
7450    
7451 wakaba 1.100 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7452 wakaba 1.56
7453 wakaba 1.61 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7454     ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7455     ## xmp, listing sdapref[HTML2,0]
7456    
7457 wakaba 1.56 =pod
7458    
7459 wakaba 1.61 HTML 2.0 nextid @n
7460    
7461     RFC 2659: CERTS CRYPTOPTS
7462    
7463     ISO-HTML: pre-html, divN
7464 wakaba 1.82
7465     XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7466     di (Common), nl (Common), handler (Common, type), standby (Common),
7467     summary (Common)
7468    
7469 wakaba 1.97 Access & XHTML2: access (LC)
7470 wakaba 1.82
7471     XML Events & XForms (for XHTML2 support; very, very low priority)
7472 wakaba 1.61
7473 wakaba 1.56 =cut
7474 wakaba 1.61
7475     ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7476     ## We added them only to |a|. |link| and |form| might also allow them
7477     ## in theory.
7478 wakaba 1.1
7479     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7480    
7481     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24