/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.218 - (show annotations) (download)
Sat Sep 5 09:26:54 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.217: +2 -0 lines
++ whatpm/t/ChangeLog	5 Sep 2009 09:26:39 -0000
2009-09-05  Wakaba  <wakaba@suika.fam.cx>

	* tokenizer-test-1.test: Added test cases for "comment end bang
	state" (HTML5 revision 3191).

++ whatpm/Whatpm/HTML/ChangeLog	5 Sep 2009 09:26:12 -0000
2009-09-05  Wakaba  <wakaba@suika.fam.cx>

	* Tokenizer.pm.src (_get_next_token): Implemented the "comment end
	bang state" (HTML5 revision 3191).

1 package Whatpm::ContentChecker;
2 use strict;
3 require Whatpm::ContentChecker;
4
5 use Char::Class::XML qw/InXML_NCNameStartChar10 InXMLNCNameChar10/;
6
7 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
8
9 ## --- Feature Status ---
10
11 sub FEATURE_HTML5_REC () {
12 ## NOTE: Part of HTML5, the implemented status.
13 Whatpm::ContentChecker::FEATURE_STATUS_REC |
14 Whatpm::ContentChecker::FEATURE_ALLOWED
15
16 ## Strictly speaking, HTML5's "implemented and widely deployed"
17 ## status does not necessarily satisfy the condition for
18 ## FEATURE_STATUS_REC, since there is no test cases for most of
19 ## features marked as "implemented" in HTML5. Nevertheless, we
20 ## special-case HTML5's this status as if that had passed the CR
21 ## phase, considering HTML's history.
22 }
23
24 sub FEATURE_HTML5_CR () {
25 ## NOTE: Part of HTML5, the awaiting implementation feedback status.
26 Whatpm::ContentChecker::FEATURE_STATUS_CR |
27 Whatpm::ContentChecker::FEATURE_ALLOWED
28 }
29 sub FEATURE_HTML5_LC () {
30 ## NOTE: Part of HTML5, the last call of comments status.
31 Whatpm::ContentChecker::FEATURE_STATUS_LC |
32 Whatpm::ContentChecker::FEATURE_ALLOWED
33 }
34 sub FEATURE_HTML5_AT_RISK () {
35 ## NOTE: Part of HTML5, but in the being considered for removal
36 ## status.
37 Whatpm::ContentChecker::FEATURE_STATUS_WD |
38 Whatpm::ContentChecker::FEATURE_ALLOWED
39 }
40 sub FEATURE_HTML5_WD () {
41 ## NOTE: Part of HTML5, the working draft status.
42 Whatpm::ContentChecker::FEATURE_STATUS_WD |
43 Whatpm::ContentChecker::FEATURE_ALLOWED
44 }
45 sub FEATURE_HTML5_FD () {
46 ## NOTE: Part of HTML5, the first draft status.
47 Whatpm::ContentChecker::FEATURE_STATUS_WD |
48 Whatpm::ContentChecker::FEATURE_ALLOWED
49 }
50 sub FEATURE_HTML5_DEFAULT () {
51 ## NOTE: Part of HTML5, but not annotated.
52 Whatpm::ContentChecker::FEATURE_STATUS_WD |
53 Whatpm::ContentChecker::FEATURE_ALLOWED
54 }
55 sub FEATURE_HTML5_DROPPED () {
56 ## NOTE: Was part of HTML5, in a status before the last call of
57 ## comments, but then dropped.
58 Whatpm::ContentChecker::FEATURE_STATUS_WD
59 }
60 sub FEATURE_HTML5_LC_DROPPED () {
61 ## NOTE: Was part of HTML5, in the last call of comments status, but
62 ## then dropped.
63 Whatpm::ContentChecker::FEATURE_STATUS_LC
64 }
65
66 sub FEATURE_WF2X () {
67 ## NOTE: Defined in WF2 (whether deprecated or not) and then
68 ## incorporated into the HTML5 spec.
69 Whatpm::ContentChecker::FEATURE_STATUS_LC
70 }
71 sub FEATURE_WF2 () {
72 ## NOTE: Features introduced or modified in WF2, which were not
73 ## merged into HTML5.
74 Whatpm::ContentChecker::FEATURE_STATUS_LC
75 }
76 sub FEATURE_WF2_INFORMATIVE () {
77 ## NOTE: Features mentioned in WF2's informative appendix A, which
78 ## were not merged into HTML5.
79 Whatpm::ContentChecker::FEATURE_STATUS_LC
80 }
81
82 sub FEATURE_RDFA_REC () {
83 Whatpm::ContentChecker::FEATURE_STATUS_REC
84 }
85 sub FEATURE_RDFA_LC_DROPPED () {
86 ## NOTE: The feature that was defined in a RDFa last call working
87 ## draft, but then dropped.
88 Whatpm::ContentChecker::FEATURE_STATUS_LC
89 }
90
91 ## NOTE: XHTML Role LCWD has almost no information on how the |role|
92 ## attribute can be used- the only requirements for that matter is:
93 ## "the attribute MUST be referenced using its namespace-qualified form" (and
94 ## this is a host language conformance!).
95 sub FEATURE_ROLE_LC () {
96 Whatpm::ContentChecker::FEATURE_STATUS_LC
97 }
98
99 sub FEATURE_XHTML2_ED () {
100 ## NOTE: XHTML 2.0 Editor's Draft, in which the namespace URI is
101 ## "http://www.w3.org/1999/xhtml".
102 Whatpm::ContentChecker::FEATURE_STATUS_WD
103 }
104
105 sub FEATURE_XHTMLBASIC11_CR () {
106 ## NOTE: XHTML Basic 1.1 Recommendation, new features (not in XHTML
107 ## M12N).
108 Whatpm::ContentChecker::FEATURE_STATUS_REC
109 }
110 sub FEATURE_XHTMLBASIC11_CR_DEPRECATED () {
111 ## NOTE: XHTML Basic 1.1 Recommendation, new but deprecated
112 ## features.
113 Whatpm::ContentChecker::FEATURE_STATUS_REC |
114 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
115 }
116
117 sub FEATURE_RUBY_REC () {
118 Whatpm::ContentChecker::FEATURE_STATUS_CR
119 }
120
121 sub FEATURE_M12N11_LC () {
122 ## NOTE: XHTML M12N 1.1 Recommendation, new features (not in 1.0).
123 Whatpm::ContentChecker::FEATURE_STATUS_REC;
124 }
125
126 ## NOTE: M12N10 status is based on its abstract module definition.
127 ## It contains a number of problems. (However, again, it's a REC!)
128 sub FEATURE_M12N10_REC () {
129 ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process sucks!
130 Whatpm::ContentChecker::FEATURE_STATUS_REC
131 }
132 sub FEATURE_M12N10_REC_DEPRECATED () {
133 Whatpm::ContentChecker::FEATURE_STATUS_REC |
134 Whatpm::ContentChecker::FEATURE_DEPRECATED_INFO
135 }
136
137 ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
138 ## (second edition). Only missing attributes from M12N10 abstract
139 ## definition are added.
140 sub FEATURE_XHTML10_REC () {
141 Whatpm::ContentChecker::FEATURE_STATUS_CR
142 }
143
144 ## NOTE: Diff from HTML4.
145 sub FEATURE_ISOHTML_PREPARATION () { ## Informative documentation
146 Whatpm::ContentChecker::FEATURE_STATUS_CR
147 }
148
149 ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
150 ## 4.01). Only missing attributes from XHTML10 are added.
151 sub FEATURE_HTML4_REC_RESERVED () {
152 Whatpm::ContentChecker::FEATURE_STATUS_WD
153 }
154
155 ## TODO: According to HTML4 definition, authors SHOULD use style sheets
156 ## rather than presentational attributes (deprecated or not deprecated).
157
158 ## NOTE: Diff from HTML4.
159 sub FEATURE_HTML32_REC_OBSOLETE () {
160 Whatpm::ContentChecker::FEATURE_STATUS_CR |
161 Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD
162 ## NOTE: Lowercase normative "should".
163 }
164
165 sub FEATURE_RFC2659 () { ## Experimental RFC
166 Whatpm::ContentChecker::FEATURE_STATUS_CR
167 }
168
169 ## NOTE: HTML 2.x - diff from HTML 2.0 and not in newer versions.
170 sub FEATURE_HTML2X_RFC () { ## Proposed Standard, obsolete
171 Whatpm::ContentChecker::FEATURE_STATUS_CR
172 }
173
174 ## NOTE: Diff from HTML 2.0.
175 sub FEATURE_RFC1942 () { ## Experimental RFC, obsolete
176 Whatpm::ContentChecker::FEATURE_STATUS_CR
177 }
178
179 ## NOTE: Diff from HTML 3.2.
180 sub FEATURE_HTML20_RFC () { ## Proposed Standard, obsolete
181 Whatpm::ContentChecker::FEATURE_STATUS_CR
182 }
183
184 ## --- Content Model ---
185
186 ## December 2007 HTML5 Classification
187
188 my $HTMLMetadataContent = {
189 $HTML_NS => {
190 title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
191 'event-source' => 1, eventsource => 1,
192 command => 1, datatemplate => 1,
193 ## NOTE: A |meta| with no |name| element is not allowed as
194 ## a metadata content other than |head| element.
195 meta => 1,
196 },
197 ## NOTE: RDF is mentioned in the HTML5 spec.
198 ## TODO: Other RDF elements?
199 q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
200 };
201
202 my $HTMLFlowContent = {
203 $HTML_NS => {
204 section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
205 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, hgroup => 1,
206 header => 1,
207 footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
208 ol => 1, ul => 1, dl => 1, menu => 1, figure => 1, table => 1,
209 form => 1, fieldset => 1,
210 details => 1, ## ISSUE: "Flow element" in spec.
211 datagrid => 1, ## ISSUE: "Flow element" in spec.
212 datatemplate => 1,
213 div => 1, ## ISSUE: No category in spec.
214 ## NOTE: |style| is only allowed if |scoped| attribute is specified.
215 ## Additionally, it must be before any other element or
216 ## non-inter-element-whitespace text node.
217 style => 1,
218
219 ## These phrasing content are also categorized as flow content.
220 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
221 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
222 var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
223 b => 1, bdo => 1, ruby => 1,
224 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
225 command => 1, bb => 1,
226 input => 1, button => 1, label => 1, select => 1, datalist => 1,
227 textarea => 1, keygen => 1, output => 1,
228 datagrid => 1,
229 ## NOTE: |area| is allowed only as a descendant of |map|.
230 area => 1,
231
232 ## Flow/phrasing content whose content model is transparent.
233 a => 1, ins => 1, del => 1, font => 1, map => 1,
234
235 ## These embeded content are also categorized as flow content.
236 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
237 canvas => 1,
238 },
239
240 ## These embedded content are also categorized as flow content.
241 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
242 q<http://www.w3.org/2000/svg> => {svg => 1},
243
244 ## And, non-inter-element-whitespace text nodes.
245 }; # $HTMLFlowContent
246
247 my $HTMLSectioningContent = {
248 $HTML_NS => {
249 section => 1, nav => 1, article => 1, aside => 1,
250 },
251 }; # $HTMLSectioningContent
252
253 my $HTMLSectioningRoot = {
254 $HTML_NS => {
255 blockquote => 1, datagrid => 1, figure => 1, td => 1,
256 },
257 };
258
259 my $HTMLHeadingContent = {
260 $HTML_NS => {
261 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, hgroup => 1,
262 },
263 };
264
265 my $HTMLPhrasingContent = {
266 ## NOTE: All phrasing content is also flow content.
267 $HTML_NS => {
268 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
269 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
270 var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
271 b => 1, bdo => 1, ruby => 1,
272 script => 1, noscript => 1, 'event-source' => 1, eventsource => 1,
273 command => 1, bb => 1,
274 input => 1, button => 1, label => 1, select => 1, datalist => 1,
275 textarea => 1, keygen => 1, output => 1,
276 datagrid => 1,
277 ## NOTE: |area| is allowed only as a descendant of |map|.
278 area => 1,
279
280 ## NOTE: Transparent.
281 a => 1, ins => 1, del => 1, font => 1, map => 1,
282
283 ## These embedded content is also categorized as phrasing content.
284 img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
285 canvas => 1,
286 },
287
288 ## These embedded content is also categorized as phrasing content.
289 q<http://www.w3.org/1998/Math/MathML> => {math => 1},
290 q<http://www.w3.org/2000/svg> => {svg => 1},
291
292 ## And, non-inter-element-whitespace text nodes.
293 }; # $HTMLPhrasingContent
294
295 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
296
297 my $HTMLInteractiveContent = {
298 $HTML_NS => {
299 a => 1,
300 label => 1, button => 1, select => 1, textarea => 1,
301 keygen => 1, details => 1,
302 datagrid => 1, bb => 1, ## dropped
303 iframe => 1, embed => 1,
304
305 ## NOTE: When the |usemap| attribute is specified.
306 img => 1, object => 1,
307
308 ## NOTE: When "type=hidden" attribute is not specified.
309 input => 1,
310
311 ## NOTE: When "controls" attribute is specified.
312 video => 1, audio => 1,
313
314 ## NOTE: When "type=toolbar" attribute is specified.
315 menu => 1,
316 },
317 }; # $HTMLInteractiveContent
318
319 ## NOTE: Labelable form-associated element.
320 my $LabelableFAE = {
321 $HTML_NS => {
322 input => 1, button => 1, select => 1, textarea => 1, keygen => 1,
323 },
324 };
325
326 ## Check whether the labelable form-associated element is allowed to
327 ## place there or not and mark the element ID, if any, might be used
328 ## in the |for| attribute of a |label| element.
329 my $FAECheckStart = sub {
330 my ($self, $item, $element_state) = @_;
331
332 $element_state->{id_type} = 'labelable';
333 }; # $FAECheckStart
334 my $FAECheckAttrs2 = sub {
335 my ($self, $item, $element_state) = @_;
336
337 ## This must be done in "check_attrs2" phase since it requires the
338 ## |id| attribute of the element, if any, reflected to the
339 ## |$self->{id}| hash.
340
341 CHK: {
342 if ($self->{flag}->{has_label} and $self->{flag}->{has_labelable}) {
343 my $for = $self->{flag}->{label_for};
344 if (defined $for) {
345 my $id_attrs = $self->{id}->{$for};
346 if ($id_attrs and $id_attrs->[0]) {
347 my $el = $id_attrs->[0]->owner_element;
348 if ($el and $el eq $item->{node}) {
349 ## Even if there is an ancestor |label| element with its
350 ## |for| attribute specified, the attribute value
351 ## identifies THIS element, then there is no problem.
352 last CHK;
353 }
354 }
355 }
356
357 $self->{onerror}->(node => $item->{node},
358 type => 'multiple labelable fae',
359 level => $self->{level}->{must});
360 } else {
361 $self->{flag}->{has_labelable} = 2;
362 }
363 } # CHK
364 }; # $FAECheckAttrs2
365
366 our $IsInHTMLInteractiveContent; # See Whatpm::ContentChecker.
367
368 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
369 ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
370
371 ## -- Common attribute syntacx checkers
372
373 our $AttrChecker;
374 our $AttrStatus;
375
376 my $GetHTMLEnumeratedAttrChecker = sub {
377 my $states = shift; # {value => conforming ? 1 : -1}
378 return sub {
379 my ($self, $attr) = @_;
380 my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
381 if ($states->{$value} > 0) {
382 #
383 } elsif ($states->{$value}) {
384 $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming',
385 level => $self->{level}->{must});
386 } else {
387 $self->{onerror}->(node => $attr, type => 'enumerated:invalid',
388 level => $self->{level}->{must});
389 }
390 };
391 }; # $GetHTMLEnumeratedAttrChecker
392
393 my $GetHTMLBooleanAttrChecker = sub {
394 my $local_name = shift;
395 return sub {
396 my ($self, $attr) = @_;
397 my $value = lc $attr->value; ## TODO: case
398 unless ($value eq $local_name or $value eq '') {
399 $self->{onerror}->(node => $attr, type => 'boolean:invalid',
400 level => $self->{level}->{must});
401 }
402 };
403 }; # $GetHTMLBooleanAttrChecker
404
405 ## Unordered set of space-separated tokens
406 my $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
407 my $allowed_words = shift;
408 return sub {
409 my ($self, $attr) = @_;
410 my %word;
411 for my $word (grep {length $_}
412 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
413 unless ($word{$word}) {
414 $word{$word} = 1;
415 if (not defined $allowed_words or
416 $allowed_words->{$word}) {
417 #
418 } else {
419 $self->{onerror}->(node => $attr, type => 'word not allowed',
420 value => $word,
421 level => $self->{level}->{must});
422 }
423 } else {
424 $self->{onerror}->(node => $attr, type => 'duplicate token',
425 value => $word,
426 level => $self->{level}->{must});
427 }
428 }
429 };
430 }; # $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
431
432 ## |rel| attribute (set of space separated tokens,
433 ## whose allowed values are defined by the section on link types)
434 my $HTMLLinkTypesAttrChecker = sub {
435 my ($a_or_area, $todo, $self, $attr, $item, $element_state) = @_;
436 my %word;
437 for my $word (grep {length $_}
438 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
439 $word =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
440
441 unless ($word{$word}) {
442 $word{$word} = 1;
443 } elsif ($word eq 'up') {
444 #
445 } else {
446 $self->{onerror}->(node => $attr, type => 'duplicate token',
447 value => $word,
448 level => $self->{level}->{must});
449 }
450 }
451
452 ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
453 ## "MAY"s and "only ... MAY" restrict non-standard non-registered
454 ## values to be used conformingly.
455
456 my $is_hyperlink;
457 my $is_resource;
458 require Whatpm::_LinkTypeList;
459 our $LinkType;
460 for my $word (keys %word) {
461 my $def = $LinkType->{$word};
462 if (defined $def) {
463 if ($def->{status} eq 'accepted') {
464 if (defined $def->{effect}->[$a_or_area]) {
465 #
466 } else {
467 $self->{onerror}->(node => $attr,
468 type => 'link type:bad context',
469 value => $word,
470 level => $self->{level}->{must});
471 }
472 } elsif ($def->{status} eq 'proposal') {
473 $self->{onerror}->(node => $attr,
474 type => 'link type:proposed',
475 value => $word,
476 level => $self->{level}->{should});
477 if (defined $def->{effect}->[$a_or_area]) {
478 #
479 } else {
480 $self->{onerror}->(node => $attr,
481 type => 'link type:bad context',
482 value => $word,
483 level => $self->{level}->{must});
484 }
485 } else { # rejected or synonym
486 $self->{onerror}->(node => $attr,
487 type => 'link type:non-conforming',
488 value => $word,
489 level => $self->{level}->{must});
490 }
491 if (defined $def->{effect}->[$a_or_area]) {
492 if ($word eq 'alternate') {
493 #
494 } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
495 $is_hyperlink = 1;
496 }
497 }
498 if ($def->{unique}) {
499 unless ($self->{has_link_type}->{$word}) {
500 $self->{has_link_type}->{$word} = 1;
501 } else {
502 $self->{onerror}->(node => $attr,
503 type => 'link type:duplicate',
504 value => $word,
505 level => $self->{level}->{must});
506 }
507 }
508
509 if (defined $def->{effect}->[$a_or_area] and $word ne 'alternate') {
510 $is_hyperlink = 1 if $def->{effect}->[$a_or_area] eq 'hyperlink';
511 $is_resource = 1 if $def->{effect}->[$a_or_area] eq 'external resource';
512 }
513 } else {
514 $self->{onerror}->(node => $attr,
515 type => 'unknown link type',
516 value => $word,
517 level => $self->{level}->{uncertain});
518 }
519 }
520 $is_hyperlink = 1 if $word{alternate} and not $word{stylesheet};
521 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
522 ## says that using both X-Pingback: header field and HTML
523 ## <link rel=pingback> is deprecated and if both appears they
524 ## SHOULD contain exactly the same value.
525 ## ISSUE: Pingback 1.0 specification defines the exact representation
526 ## of its link element, which cannot be tested by the current arch.
527 ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
528 ## include any string that matches to the pattern for the rel=pingback link,
529 ## which again inpossible to test.
530 ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
531
532 ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
533 ## NOTE: We can't check "If the page is part of multiple hierarchies,
534 ## then they SHOULD be described in different paragraphs.".
535
536 $todo->{has_hyperlink_link_type} = 1 if $is_hyperlink;
537 if ($is_hyperlink or $a_or_area) {
538 $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
539 }
540 if ($is_resource and not $a_or_area) {
541 $element_state->{uri_info}->{href}->{type}->{resource} = 1;
542 }
543
544 $element_state->{link_rel} = \%word;
545 }; # $HTMLLinkTypesAttrChecker
546
547 ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
548
549 ## URI (or IRI)
550 my $HTMLURIAttrChecker = sub {
551 my ($self, $attr, $item, $element_state) = @_;
552 ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
553 my $value = $attr->value;
554 Whatpm::URIChecker->check_iri_reference ($value, sub {
555 $self->{onerror}->(@_, node => $attr);
556 }), $self->{level};
557 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
558
559 my $attr_name = $attr->name;
560 $element_state->{uri_info}->{$attr_name}->{node} = $attr;
561 ## TODO: absolute
562 push @{$self->{return}->{uri}->{$value} ||= []},
563 $element_state->{uri_info}->{$attr_name};
564 }; # $HTMLURIAttrChecker
565
566 ## A space separated list of one or more URIs (or IRIs)
567 my $HTMLSpaceURIsAttrChecker = sub {
568 my ($self, $attr) = @_;
569
570 my $type = {ping => 'action',
571 profile => 'namespace',
572 archive => 'resource'}->{$attr->name};
573
574 my $i = 0;
575 for my $value (split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
576 Whatpm::URIChecker->check_iri_reference ($value, sub {
577 $self->{onerror}->(value => $value, @_, node => $attr, index => $i);
578 }, $self->{level});
579
580 ## TODO: absolute
581 push @{$self->{return}->{uri}->{$value} ||= []},
582 {node => $attr, type => {$type => 1}};
583
584 $i++;
585 }
586 ## ISSUE: Relative references? (especially, in profile="")
587 ## ISSUE: Leading or trailing white spaces are conformant?
588 ## ISSUE: A sequence of white space characters are conformant?
589 ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
590 ## ISSUE: What is "space"?
591 ## NOTE: Duplication seems not an error.
592 $self->{has_uri_attr} = 1;
593 }; # $HTMLSpaceURIsAttrChecker
594
595 my $ValidEmailAddress;
596 {
597 my $atext = qr[[A-Za-z0-9!#\$%&'*+/=?^_`{|}~-]];
598 my $dot_atom = qr/$atext+(?>\.$atext+)*/o;
599 $ValidEmailAddress = qr/$dot_atom\@$dot_atom/o;
600 }
601
602 ## Valid global date and time.
603 my $GetDateTimeAttrChecker = sub ($) {
604 my $type = shift;
605 return sub {
606 my ($self, $attr, $item, $element_state) = @_;
607
608 my $range_error;
609
610 require Message::Date;
611 my $dp = Message::Date->new;
612 $dp->{level} = $self->{level};
613 $dp->{onerror} = sub {
614 my %opt = @_;
615 unless ($opt{type} eq 'date value not supported') {
616 $self->{onerror}->(%opt, node => $attr);
617 $range_error = '';
618 }
619 };
620
621 my $method = 'parse_' . $type;
622 my $d = $dp->$method ($attr->value);
623 $element_state->{date_value}->{$attr->name} = $d || $range_error;
624 };
625 }; # $GetDateTimeAttrChecker
626
627 my $HTMLIntegerAttrChecker = sub {
628 my ($self, $attr) = @_;
629 my $value = $attr->value;
630 unless ($value =~ /\A-?[0-9]+\z/) {
631 $self->{onerror}->(node => $attr, type => 'integer:syntax error',
632 level => $self->{level}->{must});
633 }
634 }; # $HTMLIntegerAttrChecker
635
636 my $GetHTMLNonNegativeIntegerAttrChecker = sub {
637 my $range_check = shift;
638 return sub {
639 my ($self, $attr) = @_;
640 my $value = $attr->value;
641 if ($value =~ /\A[0-9]+\z/) {
642 unless ($range_check->($value + 0)) {
643 $self->{onerror}->(node => $attr, type => 'nninteger:out of range',
644 level => $self->{level}->{must});
645 }
646 } else {
647 $self->{onerror}->(node => $attr,
648 type => 'nninteger:syntax error',
649 level => $self->{level}->{must});
650 }
651 };
652 }; # $GetHTMLNonNegativeIntegerAttrChecker
653
654 ## "Valid floating point number".
655 my $GetHTMLFloatingPointNumberAttrChecker = sub {
656 my $range_check = shift;
657 return sub {
658 my ($self, $attr, $item, $element_state) = @_;
659 my $value = $attr->value;
660 if ($value =~ /
661 \A
662 (-?) # $1
663 ([0-9]+) # $2
664 (?>(\.[0-9]+))? # $3
665 (?>[Ee] ([+-]?[0-9]+) )? # $4
666 \z
667 /x) {
668 my $num = (defined $3 ? $2 . $3 : $2) + 0;
669 $num = -$num if $1;
670 $num *= 10 ** ($4 + 0) if $4; # $4 can be "-0", but no problem.
671 if ($range_check->($num)) {
672 $element_state->{number_value}->{$attr->name} = $num;
673 } else {
674 $self->{onerror}->(node => $attr, type => 'float:out of range',
675 level => $self->{level}->{must});
676 }
677 } else {
678 $self->{onerror}->(node => $attr,
679 type => 'float:syntax error',
680 level => $self->{level}->{must});
681 }
682 };
683 }; # $GetHTMLFloatingPointNumberAttrChecker
684
685 my $PositiveFloatingPointNumberAttrChecker
686 = $GetHTMLFloatingPointNumberAttrChecker->(sub { $_[0] > 0 });
687
688 my $StepAttrChecker = sub {
689 ## NOTE: A valid floating point number (> 0), or ASCII
690 ## case-insensitive "any".
691
692 my ($self, $attr) = @_;
693 my $value = $attr->value;
694 if ($value =~ /\A[Aa][Nn][Yy]\z/) {
695 #
696 } else {
697 $PositiveFloatingPointNumberAttrChecker->(@_);
698 }
699 }; # $StepAttrChecker
700
701 ## HTML4 %Length;
702 my $HTMLLengthAttrChecker = sub {
703 my ($self, $attr) = @_;
704 my $value = $attr->value;
705 unless ($value =~ /\A[0-9]+%?\z/) {
706 $self->{onerror}->(node => $attr, type => 'length:syntax error',
707 level => $self->{level}->{must});
708 }
709
710 ## NOTE: HTML4 definition is too vague - it does not define the syntax
711 ## of percentage value at all (!).
712 }; # $HTMLLengthAttrChecker
713
714 my $MIMEToken = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
715 my $TypeOrSubtype = qr/[A-Za-z0-9!#\$&.+^_-]{1,127}/; # RFC 4288
716 my $IMTNoParameter = qr[($TypeOrSubtype)/($TypeOrSubtype)];
717
718 ## "A valid MIME type, optionally with parameters. [RFC 2046]"
719 ## ISSUE: RFC 2046 does not define syntax of media types.
720 ## ISSUE: The definition of "a valid MIME type" is unknown.
721 ## Syntactical correctness?
722 my $HTMLIMTAttrChecker = sub {
723 my ($self, $attr) = @_;
724 my $value = $attr->value;
725 ## ISSUE: RFC 2045 Content-Type header field allows insertion
726 ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
727 ## ISSUE: RFC 2231 extension? Maybe no.
728 my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
729 my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
730 if ($value =~ m#\A$lws0($MIMEToken)$lws0/$lws0($MIMEToken)$lws0((?>;$lws0$MIMEToken$lws0=$lws0(?>$MIMEToken|$qs)$lws0)*)\z#) {
731 my @type = ($1, $2);
732 my $param = $3;
733 while ($param =~ s/^;$lws0($MIMEToken)$lws0=$lws0(?>($MIMEToken)|($qs))$lws0//) {
734 if (defined $2) {
735 push @type, $1 => $2;
736 } else {
737 my $n = $1;
738 my $v = $3;
739 $v =~ s/\\(.)/$1/gs;
740 push @type, $n => substr ($v, 1, length ($v) - 2);
741 }
742 }
743 require Whatpm::IMTChecker;
744 my $ic = Whatpm::IMTChecker->new;
745 $ic->{level} = $self->{level};
746 $ic->check_imt (sub {
747 $self->{onerror}->(@_, node => $attr);
748 }, @type);
749 } else {
750 $self->{onerror}->(node => $attr, type => 'IMT:syntax error',
751 level => $self->{level}->{must});
752 }
753 }; # $HTMLIMTAttrChecker
754
755 my $HTMLLanguageTagAttrChecker = sub {
756 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
757
758 my ($self, $attr) = @_;
759 my $value = $attr->value;
760 require Whatpm::LangTag;
761 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
762 $self->{onerror}->(@_, node => $attr);
763 }, $self->{level});
764 ## ISSUE: RFC 4646 (3066bis)?
765
766 ## TODO: testdata
767 }; # $HTMLLanguageTagAttrChecker
768
769 ## "A valid media query [MQ]"
770 my $HTMLMQAttrChecker = sub {
771 my ($self, $attr) = @_;
772 $self->{onerror}->(node => $attr,
773 type => 'media query',
774 level => $self->{level}->{uncertain});
775 ## ISSUE: What is "a valid media query"?
776 }; # $HTMLMQAttrChecker
777
778 my $HTMLEventHandlerAttrChecker = sub {
779 my ($self, $attr) = @_;
780 $self->{onerror}->(node => $attr,
781 type => 'event handler',
782 level => $self->{level}->{uncertain});
783 ## TODO: MUST contain valid ECMAScript code matching the
784 ## ECMAScript |FunctionBody| production. [ECMA262]
785 ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
786 ## ISSUE: Automatic semicolon insertion does not apply?
787 ## ISSUE: Other script languages?
788 }; # $HTMLEventHandlerAttrChecker
789
790 my $HTMLFormAttrChecker = sub {
791 my ($self, $attr) = @_;
792
793 ## NOTE: MUST be the ID of a |form| element.
794
795 my $value = $attr->value;
796 push @{$self->{idref}}, ['form', $value => $attr];
797
798 ## ISSUE: <form id=""><input form=""> (empty ID)?
799 }; # $HTMLFormAttrChecker
800
801 my $ListAttrChecker = sub {
802 my ($self, $attr) = @_;
803
804 ## NOTE: MUST be the ID of a |datalist| element.
805
806 push @{$self->{idref}}, ['datalist', $attr->value, $attr];
807
808 ## TODO: Warn violation to control-dependent restrictions. For
809 ## example, |<input type=url maxlength=10 list=a> <datalist
810 ## id=a><option value=nonurlandtoolong></datalist>| should be
811 ## warned.
812 }; # $ListAttrChecker
813
814 my $PatternAttrChecker = sub {
815 my ($self, $attr) = @_;
816 $self->{onsubdoc}->({s => $attr->value,
817 container_node => $attr,
818 media_type => 'text/x-regexp-js',
819 is_char_string => 1});
820
821 ## ISSUE: "value must match the Pattern production of ECMA 262's
822 ## grammar" - no additional constraints (e.g. {n,m} then n>=m).
823
824 ## TODO: Warn if @value does not match @pattern.
825 }; # $PatternAttrChecker
826
827 my $AcceptAttrChecker = sub {
828 my ($self, $attr) = @_;
829
830 my $value = $attr->value;
831 $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
832
833 ## A set of comma-separated tokens.
834 my @value = length $value ? split /,/, $value, -1 : ('');
835
836 my %has_value;
837 for my $v (@value) {
838 $v =~ s/^[\x09\x0A\x0C\x0D\x20]+//;
839 $v =~ s/[\x09\x0A\x0C\x0D\x20]+\z//;
840
841 if ($has_value{$v}) {
842 $self->{onerror}->(node => $attr,
843 type => 'duplicate token',
844 value => $v,
845 level => $self->{level}->{must});
846 next;
847 }
848 $has_value{$v} = 1;
849
850 if ($v eq 'audio/*' or $v eq 'video/*' or $v eq 'image/*') {
851 #
852 } elsif ($v =~ m[\A$IMTNoParameter\z]) {
853 ## ISSUE: HTML5 references RFC 2046, but maybe HTML5 should
854 ## define its own syntax citing RFC 4288.
855
856 ## NOTE: Parameters not allowed.
857 require Whatpm::IMTChecker;
858 my $ic = Whatpm::IMTChecker->new;
859 $ic->{level} = $self->{level};
860 $ic->check_imt (sub {
861 $self->{onerror}->(@_, node => $attr);
862 }, $1, $2);
863 } else {
864 $self->{onerror}->(node => $attr,
865 type => 'IMTnp:syntax error', ## TODOC: type
866 value => $v,
867 level => $self->{level}->{must});
868 }
869 }
870 }; # $AcceptAttrChecker
871
872 my $FormControlNameAttrChecker = sub {
873 my ($self, $attr) = @_;
874
875 unless (length $attr->value) {
876 $self->{onerror}->(node => $attr,
877 type => 'empty control name', ## TODOC: type
878 level => $self->{level}->{must});
879 }
880
881 ## NOTE: No uniqueness constraint.
882 }; # $FormControlNameAttrChecker
883
884 my $AutofocusAttrChecker = sub {
885 my ($self, $attr) = @_;
886
887 $GetHTMLBooleanAttrChecker->('autofocus')->(@_);
888
889 if ($self->{has_autofocus}) {
890 $self->{onerror}->(node => $attr,
891 type => 'duplicate autofocus', ## TODOC: type
892 level => $self->{level}->{must});
893 }
894 $self->{has_autofocus} = 1;
895 }; # $AutofocusAttrChekcer
896
897 my $HTMLUsemapAttrChecker = sub {
898 my ($self, $attr) = @_;
899 ## MUST be a valid hash-name reference to a |map| element.
900 my $value = $attr->value;
901 if ($value =~ s/^#//) {
902 ## NOTE: |usemap="#"| is conforming, though it identifies no |map| element
903 ## according to the "rules for parsing a hash-name reference" algorithm.
904 ## The document is non-conforming anyway, since |<map name="">| (empty
905 ## name) is non-conforming.
906 push @{$self->{usemap}}, [$value => $attr];
907 } else {
908 $self->{onerror}->(node => $attr, type => 'hashref:syntax error',
909 level => $self->{level}->{must});
910 }
911 ## NOTE: Space characters in hash-name references are conforming.
912 ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
913 }; # $HTMLUsemapAttrChecker
914
915 ## Valid browsing context name
916 my $HTMLBrowsingContextNameAttrChecker = sub {
917 my ($self, $attr) = @_;
918 my $value = $attr->value;
919 if ($value =~ /^_/) {
920 $self->{onerror}->(node => $attr, type => 'window name:reserved',
921 level => $self->{level}->{must},
922 value => $value);
923 } elsif (length $value) {
924 #
925 } else {
926 $self->{onerror}->(node => $attr, type => 'window name:empty',
927 level => $self->{level}->{must});
928 }
929 }; # $HTMLBrowsingContextNameAttrChecker
930
931 ## Valid browsing context name or keyword
932 my $HTMLTargetAttrChecker = sub {
933 my ($self, $attr) = @_;
934 my $value = $attr->value;
935 if ($value =~ /^_/) {
936 $value = lc $value; ## ISSUE: ASCII case-insentitive?
937 unless ({
938 _blank => 1,_self => 1, _parent => 1, _top => 1,
939 }->{$value}) {
940 $self->{onerror}->(node => $attr,
941 type => 'window name:reserved',
942 level => $self->{level}->{must},
943 value => $value);
944 }
945 } elsif (length $value) {
946 #
947 } else {
948 $self->{onerror}->(node => $attr, type => 'window name:empty',
949 level => $self->{level}->{must});
950 }
951 }; # $HTMLTargetAttrChecker
952
953 my $HTMLSelectorsAttrChecker = sub {
954 my ($self, $attr) = @_;
955
956 ## ISSUE: Namespace resolution?
957
958 my $value = $attr->value;
959
960 require Whatpm::CSS::SelectorsParser;
961 my $p = Whatpm::CSS::SelectorsParser->new;
962 $p->{pseudo_class}->{$_} = 1 for qw/
963 active checked disabled empty enabled first-child first-of-type
964 focus hover indeterminate last-child last-of-type link only-child
965 only-of-type root target visited
966 lang nth-child nth-last-child nth-of-type nth-last-of-type not
967 -manakai-contains -manakai-current
968 /;
969
970 $p->{pseudo_element}->{$_} = 1 for qw/
971 after before first-letter first-line
972 /;
973
974 $p->{level} = $self->{level};
975 $p->{onerror} = sub {
976 $self->{onerror}->(@_, node => $attr);
977 };
978 $p->parse_string ($value);
979 }; # $HTMLSelectorsAttrChecker
980
981 my $HTMLCharsetChecker = sub ($$$;$) {
982 my ($charset_value, $self, $attr, $ascii_compat) = @_;
983
984 ## NOTE: This code is used for |charset=""| attributes, |charset=|
985 ## portion of the |content=""| attributes, and |accept-charset=""|
986 ## attributes.
987
988 ## NOTE: Though the case-sensitivility of |charset| attribute value
989 ## is not explicitly spelled in the HTML5 spec, the Character Set
990 ## registry of IANA, which is referenced from HTML5 spec, says that
991 ## charset name is case-insensitive.
992 $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
993
994 require Message::Charset::Info;
995 my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
996
997 ## ISSUE: What is "valid character encoding name"? Syntactically valid?
998 ## Syntactically valid and registered? What about x-charset names?
999 unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1000 ($charset_value)) {
1001 $self->{onerror}->(node => $attr,
1002 type => 'charset:syntax error',
1003 value => $charset_value,
1004 level => $self->{level}->{must});
1005 }
1006
1007 if ($charset) {
1008 ## ISSUE: What is "the preferred name for that encoding" (for a charset
1009 ## with no "preferred MIME name" label)?
1010 my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1011 if (($charset_status &
1012 Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1013 != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1014 $self->{onerror}->(node => $attr,
1015 type => 'charset:not preferred',
1016 value => $charset_value,
1017 level => $self->{level}->{must});
1018 }
1019
1020 if (($charset_status &
1021 Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1022 != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1023 if ($charset_value =~ /^x-/) {
1024 $self->{onerror}->(node => $attr,
1025 type => 'charset:private',
1026 value => $charset_value,
1027 level => $self->{level}->{good});
1028 } else {
1029 $self->{onerror}->(node => $attr,
1030 type => 'charset:not registered',
1031 value => $charset_value,
1032 level => $self->{level}->{good});
1033 }
1034 }
1035
1036 if ($ascii_compat) {
1037 if ($charset->{category} &
1038 Message::Charset::Info::CHARSET_CATEGORY_ASCII_COMPAT ()) {
1039 #
1040 } else {
1041 $self->{onerror}->(node => $attr,
1042 type => 'charset:not ascii compat',
1043 value => $charset_value,
1044 level => $self->{level}->{must});
1045 }
1046 }
1047
1048 ## TODO: non-preferred-name error for following cases:
1049 } elsif ($charset_value =~ /^x-/) {
1050 $self->{onerror}->(node => $attr,
1051 type => 'charset:private',
1052 value => $charset_value,
1053 level => $self->{level}->{good});
1054
1055 ## NOTE: Whether this is an ASCII-compatible character encoding or
1056 ## not is unknown.
1057 } else {
1058 $self->{onerror}->(node => $attr,
1059 type => 'charset:not registered',
1060 value => $charset_value,
1061 level => $self->{level}->{good});
1062
1063 ## NOTE: Whether this is an ASCII-compatible character encoding or
1064 ## not is unknown.
1065 }
1066
1067 return ($charset, $charset_value);
1068 }; # $HTMLCharsetChecker
1069
1070 ## NOTE: "An ordered set of space-separated tokens" where "each token
1071 ## MUST be the preferred name of an ASCII-compatible character
1072 ## encoding".
1073 my $HTMLCharsetsAttrChecker = sub {
1074 my ($self, $attr) = @_;
1075
1076 ## ISSUE: "ordered set of space-separated tokens" is not defined.
1077
1078 my @value = grep {length $_} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1079
1080 ## XXX
1081 ## ISSUE: Uniqueness is not enforced.
1082
1083 for my $charset (@value) {
1084 $HTMLCharsetChecker->($charset, $self, $attr, 1);
1085 }
1086
1087 ## ISSUE: Shift_JIS is ASCII-compatible? What about ISO-2022-JP?
1088 }; # $HTMLCharsetsAttrChecker
1089
1090 my $HTMLColorAttrChecker = sub {
1091 my ($self, $attr) = @_;
1092
1093 ## NOTE: HTML4 "color" or |%Color;|
1094
1095 my $value = $attr->value;
1096
1097 if ($value !~ /\A(?>#[0-9A-F]+|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua)\z/i) {
1098 $self->{onerror}->(node => $attr, type => 'color:syntax error',
1099 level => $self->{level}->{html4_fact});
1100 }
1101
1102 ## TODO: HTML4 has some guideline on usage of color.
1103 }; # $HTMLColorAttrChecker
1104
1105 my $HTMLRefOrTemplateAttrChecker = sub {
1106 my ($self, $attr) = @_;
1107 $HTMLURIAttrChecker->(@_);
1108
1109 my $attr_name = $attr->name;
1110
1111 if ($attr_name eq 'ref') {
1112 unless ($attr->owner_element->has_attribute_ns (undef, 'template')) {
1113 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1114 level => $self->{level}->{must});
1115 }
1116 }
1117
1118 require Message::URL;
1119 my $doc = $attr->owner_document;
1120 my $doc_uri = $doc->document_uri;
1121 my $uri = Message::URL->new_abs ($attr->value, $doc_uri);
1122 my $no_frag_uri = $uri->clone;
1123 $no_frag_uri->uri_fragment (undef);
1124 if ((defined $doc_uri and $doc_uri eq $no_frag_uri) or
1125 (not defined $doc_uri and $no_frag_uri eq '')) {
1126 my $fragid = $uri->uri_fragment;
1127 if (defined $fragid) {
1128 push @{$self->{$attr_name}}, [$fragid => $attr];
1129 } else {
1130 DOCEL: {
1131 last DOCEL unless $attr_name eq 'template';
1132
1133 my $docel = $doc->document_element;
1134 if ($docel) {
1135 my $nsuri = $docel->namespace_uri;
1136 if (defined $nsuri and $nsuri eq $HTML_NS) {
1137 if ($docel->manakai_local_name eq 'datatemplate') {
1138 last DOCEL;
1139 }
1140 }
1141 }
1142
1143 $self->{onerror}->(node => $attr, type => 'template:not template',
1144 level => $self->{level}->{must});
1145 } # DOCEL
1146 }
1147 } else {
1148 ## An external document is referenced.
1149
1150 ## NOTE: Maybe the same-policy restriction should be posed to the
1151 ## referenced document, but the spec did not define such
1152 ## requirements and the entire feature has already been dropped
1153 ## from the spec anyway.
1154
1155 ## XXXresource:
1156 ## - The document MUST be an HTML or XML document.
1157 ## - If there is a fragment identifier, it MUST point a part of the doc.
1158 ## - If the attribute is |template|, the pointed part MUST be a
1159 ## |datatemplat| element.
1160 ## - If no fragment identifier is specified, the root element MUST be
1161 ## a |datatemplate| element when the attribute is |template|.
1162 }
1163 }; # $HTMLRefOrTemplateAttrChecker
1164
1165 my $HTMLRepeatIndexAttrChecker = sub {
1166 my ($self, $attr) = @_;
1167
1168 if (defined $attr->namespace_uri) {
1169 my $oe = $attr->owner_element;
1170 my $oe_nsuri = $oe->namespace_uri;
1171 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) { ## TODO: wrong?
1172 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1173 level => $self->{level}->{must});
1174 }
1175 }
1176
1177 $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
1178 }; # $HTMLRepeatIndexAttrChecker
1179
1180 my $PlaceholderAttrChecker = sub {
1181 my ($self, $attr) = @_;
1182 if ($attr->value =~ /[\x0D\x0A]/) {
1183 $self->{onerror}->(node => $attr,
1184 type => 'newline in value', ## TODOC: type
1185 level => $self->{level}->{must});
1186 }
1187 }; # $PlaceholderAttrChecker
1188
1189 my $HTMLAttrChecker = {
1190 accesskey => sub {
1191 my ($self, $attr) = @_;
1192
1193 ## "Ordered set of unique space-separated tokens"
1194
1195 my %keys;
1196 my @keys = grep {length} split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value;
1197
1198 for my $key (@keys) {
1199 unless ($keys{$key}) {
1200 $keys{$key} = 1;
1201 if (length $key != 1) {
1202 $self->{onerror}->(node => $attr, type => 'char:syntax error',
1203 value => $key,
1204 level => $self->{level}->{must});
1205 }
1206 } else {
1207 $self->{onerror}->(node => $attr, type => 'duplicate token',
1208 value => $key,
1209 level => $self->{level}->{must});
1210 }
1211 }
1212 }, # accesskey
1213
1214 ## TODO: aria-* ## TODO: svg:*/@aria-* [HTML5ROLE] -> [STATES]
1215 id => sub {
1216 my ($self, $attr, $item, $element_state) = @_;
1217 my $value = $attr->value;
1218 if (length $value > 0) {
1219 if ($self->{id}->{$value}) {
1220 $self->{onerror}->(node => $attr, type => 'duplicate ID',
1221 level => $self->{level}->{must});
1222 push @{$self->{id}->{$value}}, $attr;
1223 } else {
1224 $self->{id}->{$value} = [$attr];
1225 $self->{id_type}->{$value} = $element_state->{id_type} || '';
1226 }
1227 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
1228 $self->{onerror}->(node => $attr, type => 'space in ID',
1229 level => $self->{level}->{must});
1230 }
1231 } else {
1232 ## NOTE: MUST contain at least one character
1233 $self->{onerror}->(node => $attr, type => 'empty attribute value',
1234 level => $self->{level}->{must});
1235 }
1236 },
1237 title => sub {}, ## NOTE: No conformance creteria
1238 lang => sub {
1239 my ($self, $attr) = @_;
1240 my $value = $attr->value;
1241 if ($value eq '') {
1242 #
1243 } else {
1244 require Whatpm::LangTag;
1245 Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
1246 $self->{onerror}->(@_, node => $attr);
1247 }, $self->{level});
1248 }
1249 ## ISSUE: RFC 4646 (3066bis)?
1250
1251 ## TODO: test data
1252
1253 ## NOTE: Inconsistency between |lang| and |xml:lang| attributes are
1254 ## non-conforming. Such errors are detected by the checkers of
1255 ## |{}xml:lang| and |{xml}:lang| attributes.
1256 },
1257 dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
1258 class => sub {
1259 my ($self, $attr) = @_;
1260
1261 ## NOTE: "Unordered set of unique space-separated tokens".
1262
1263 my %word;
1264 for my $word (grep {length $_}
1265 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
1266 unless ($word{$word}) {
1267 $word{$word} = 1;
1268 push @{$self->{return}->{class}->{$word}||=[]}, $attr;
1269 } else {
1270 $self->{onerror}->(node => $attr, type => 'duplicate token',
1271 value => $word,
1272 level => $self->{level}->{must});
1273 }
1274 }
1275 },
1276 contenteditable => $GetHTMLEnumeratedAttrChecker->({
1277 true => 1, false => 1, '' => 1,
1278 }),
1279 contextmenu => sub {
1280 my ($self, $attr) = @_;
1281 my $value = $attr->value;
1282 push @{$self->{idref}}, ['menu', $value => $attr];
1283 ## ISSUE: "The value must be the ID of a menu element in the DOM."
1284 ## What is "in the DOM"? A menu Element node that is not part
1285 ## of the Document tree is in the DOM? A menu Element node that
1286 ## belong to another Document tree is in the DOM?
1287 },
1288 hidden => $GetHTMLBooleanAttrChecker->('hidden'),
1289 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
1290 ref => $HTMLRefOrTemplateAttrChecker,
1291 registrationmark => sub {
1292 my ($self, $attr, $item, $element_state) = @_;
1293
1294 ## NOTE: Any value is conforming.
1295
1296 if ($self->{flag}->{in_rule}) {
1297 my $el = $attr->owner_element;
1298 my $ln = $el->manakai_local_name;
1299 if ($ln eq 'nest' or
1300 ($ln eq 'rule' and not $element_state->{in_rule_original})) {
1301 my $nsuri = $el->namespace_uri;
1302 if (defined $nsuri and $nsuri eq $HTML_NS) {
1303 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1304 level => $self->{level}->{must});
1305 }
1306 }
1307 } else {
1308 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1309 level => $self->{level}->{must});
1310 }
1311 },
1312 repeat => sub {
1313 my ($self, $attr) = @_;
1314
1315 if (defined $attr->namespace_uri) {
1316 my $oe = $attr->owner_element;
1317 my $oe_nsuri = $oe->namespace_uri;
1318 if (defined $oe_nsuri or $oe_nsuri eq $HTML_NS) {
1319 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1320 level => $self->{level}->{must});
1321 }
1322 }
1323
1324 my $value = $attr->value;
1325 if ($value eq 'template') {
1326 #
1327 } elsif ($value =~ /\A-?[0-9]+\z/) {
1328 #
1329 } else {
1330 $self->{onerror}->(node => $attr, type => 'repeat:syntax error',
1331 level => $self->{level}->{must});
1332 }
1333
1334 ## NOTE: Where this attribute is allowed to set was not clearly
1335 ## defined in Web Forms 2.0. The spec said that "Repetition
1336 ## templates may occur anywhere", which might imply the attribute
1337 ## can be specified to any element, but its primary implication
1338 ## would be that the template can be appear in any hierarchy in
1339 ## the document structure. Anyway, the feature has been removed
1340 ## from the HTML5 spec.
1341 },
1342 'repeat-min' => $HTMLRepeatIndexAttrChecker,
1343 'repeat-max' => $HTMLRepeatIndexAttrChecker,
1344 'repeat-start' => $HTMLRepeatIndexAttrChecker,
1345 'repeat-template' => sub {
1346 my ($self, $attr) = @_;
1347
1348 if (defined $attr->namespace_uri) {
1349 my $oe = $attr->owner_element;
1350 my $oe_nsuri = $oe->namespace_uri;
1351 if (defined $oe_nsuri and $oe_nsuri eq $HTML_NS) {
1352 $self->{onerror}->(node => $attr, type => 'attribute not allowed',
1353 level => $self->{level}->{must});
1354 }
1355 }
1356
1357 ## NOTE: In the Web Forms 2.0 specification, this attribute had no
1358 ## author requirement. In addition, though the spec said that
1359 ## repetition blocks MAY have this attribute specified, it did not
1360 ## explicitly prohibit the attribute specified on an element that
1361 ## is not a repetition block. In anyway, the repetition template
1362 ## feature has been removed from the HTML5 specification.
1363 },
1364 ## TODO: role [HTML5ROLE] ## TODO: global @role [XHTML1ROLE]
1365 spellcheck => $GetHTMLEnumeratedAttrChecker->({
1366 true => 1, false => 1, '' => 1,
1367 }),
1368 style => sub {
1369 my ($self, $attr) = @_;
1370
1371 $self->{onsubdoc}->({s => $attr->value,
1372 container_node => $attr,
1373 media_type => 'text/x-css-inline',
1374 is_char_string => 1});
1375
1376 ## NOTE: "... MUST still be comprehensible and usable if those
1377 ## attributes were removed" is a semantic requirement, it cannot
1378 ## be tested.
1379 },
1380 tabindex => $HTMLIntegerAttrChecker,
1381 template => $HTMLRefOrTemplateAttrChecker,
1382
1383 ## The |xml:lang| attribute in the null namespace, which is
1384 ## different from the |lang| attribute in the XML's namespace.
1385 'xml:lang' => sub {
1386 my ($self, $attr) = @_;
1387
1388 if ($attr->owner_document->manakai_is_html) {
1389 $self->{onerror}->(type => 'in HTML:xml:lang',
1390 level => $self->{level}->{info},
1391 node => $attr);
1392 ## NOTE: This is not an error, but the attribute will be ignored.
1393 } else {
1394 $self->{onerror}->(type => 'in XML:xml:lang',
1395 level => $self->{level}->{html5_no_may},
1396 node => $attr);
1397 ## TODO: We need to add test for this error.
1398 }
1399
1400 my $lang_attr = $attr->owner_element->get_attribute_node_ns
1401 (undef, 'lang');
1402 if ($lang_attr) {
1403 my $lang_attr_value = $lang_attr->value;
1404 $lang_attr_value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1405 my $value = $attr->value;
1406 $value =~ tr/A-Z/a-z/; ## ASCII case-insensitive
1407 if ($lang_attr_value ne $value) {
1408 $self->{onerror}->(type => 'xml:lang ne lang',
1409 level => $self->{level}->{must},
1410 node => $attr);
1411 }
1412 } else {
1413 $self->{onerror}->(type => 'xml:lang not allowed',
1414 level => $self->{level}->{must},
1415 node => $attr);
1416 ## TODO: We need to add test for <x {xml}:lang {}xml:lang>.
1417 }
1418 },
1419
1420 ## The |xmlns| attribute in the null namespace, which is different
1421 ## from the |xmlns| attribute in the XMLNS namespace.
1422 xmlns => sub {
1423 my ($self, $attr) = @_;
1424 my $value = $attr->value;
1425 unless ($value eq $HTML_NS) {
1426 $self->{onerror}->(node => $attr, type => 'invalid attribute value',
1427 level => $self->{level}->{must});
1428 ## TODO: Should be new "bad namespace" error?
1429 }
1430 unless ($attr->owner_document->manakai_is_html) {
1431 $self->{onerror}->(node => $attr, type => 'in XML:xmlns',
1432 level => $self->{level}->{must});
1433 ## TODO: Test
1434 }
1435
1436 ## TODO: Should be resolved?
1437 push @{$self->{return}->{uri}->{$value} ||= []},
1438 {node => $attr, type => {namespace => 1}};
1439 },
1440 };
1441
1442 my %HTMLAttrStatus = (
1443 accesskey => FEATURE_HTML5_FD,
1444 class => FEATURE_HTML5_LC,
1445 contenteditable => FEATURE_HTML5_REC,
1446 contextmenu => FEATURE_HTML5_WD,
1447 dir => FEATURE_HTML5_REC,
1448 draggable => FEATURE_HTML5_LC,
1449 hidden => FEATURE_HTML5_LC,
1450 id => FEATURE_HTML5_REC,
1451 irrelevant => FEATURE_HTML5_DROPPED,
1452 lang => FEATURE_HTML5_REC,
1453 ref => FEATURE_HTML5_DROPPED,
1454 registrationmark => FEATURE_HTML5_DROPPED,
1455 repeat => FEATURE_WF2,
1456 'repeat-max' => FEATURE_WF2,
1457 'repeat-min' => FEATURE_WF2,
1458 'repeat-start' => FEATURE_WF2,
1459 'repeat-template' => FEATURE_WF2,
1460 role => 0,
1461 spellcheck => FEATURE_HTML5_WD,
1462 style => FEATURE_HTML5_REC,
1463 tabindex => FEATURE_HTML5_DEFAULT,
1464 template => FEATURE_HTML5_DROPPED,
1465 title => FEATURE_HTML5_REC,
1466 xmlns => FEATURE_HTML5_WD,
1467 );
1468
1469 my %HTMLM12NCommonAttrStatus = (
1470 about => FEATURE_RDFA_REC,
1471 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
1472 content => FEATURE_RDFA_REC,
1473 datatype => FEATURE_RDFA_REC,
1474 dir => FEATURE_HTML5_REC,
1475 href => FEATURE_RDFA_REC,
1476 id => FEATURE_HTML5_REC,
1477 instanceof => FEATURE_RDFA_LC_DROPPED,
1478 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1479 ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1480 onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1481 onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1482 onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1483 onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1484 onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1485 onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1486 onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1487 onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1488 property => FEATURE_RDFA_REC,
1489 rel => FEATURE_RDFA_REC,
1490 resource => FEATURE_RDFA_REC,
1491 rev => FEATURE_RDFA_REC,
1492 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1493 # FEATURE_M12N10_REC,
1494 style => FEATURE_HTML5_REC,
1495 title => FEATURE_HTML5_REC,
1496 typeof => FEATURE_RDFA_REC,
1497 );
1498
1499 my %XHTML2CommonAttrStatus = (
1500 ## Core
1501 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
1502 id => FEATURE_HTML5_REC,
1503 #xml:id
1504 layout => FEATURE_XHTML2_ED,
1505 title => FEATURE_HTML5_REC,
1506
1507 ## Hypertext
1508 cite => FEATURE_XHTML2_ED,
1509 href => FEATURE_XHTML2_ED,
1510 hreflang => FEATURE_XHTML2_ED,
1511 hrefmedia => FEATURE_XHTML2_ED,
1512 hreftype => FEATURE_XHTML2_ED,
1513 nextfocus => FEATURE_XHTML2_ED,
1514 prevfocus => FEATURE_XHTML2_ED,
1515 target => FEATURE_XHTML2_ED,
1516 #xml:base
1517
1518 ## I18N
1519 #xml:lang
1520
1521 ## Bi-directional
1522 dir => FEATURE_HTML5_REC,
1523
1524 ## Edit
1525 edit => FEATURE_XHTML2_ED,
1526 datetime => FEATURE_XHTML2_ED,
1527
1528 ## Embedding
1529 encoding => FEATURE_XHTML2_ED,
1530 src => FEATURE_XHTML2_ED,
1531 srctype => FEATURE_XHTML2_ED,
1532
1533 ## Image Map
1534 usemap => FEATURE_XHTML2_ED,
1535 ismap => FEATURE_XHTML2_ED,
1536 shape => FEATURE_XHTML2_ED,
1537 coords => FEATURE_XHTML2_ED,
1538
1539 ## Media
1540 media => FEATURE_XHTML2_ED,
1541
1542 ## Metadata
1543 about => FEATURE_XHTML2_ED,
1544 content => FEATURE_XHTML2_ED,
1545 datatype => FEATURE_XHTML2_ED,
1546 instanceof => FEATURE_XHTML2_ED,
1547 property => FEATURE_XHTML2_ED,
1548 rel => FEATURE_XHTML2_ED,
1549 resource => FEATURE_XHTML2_ED,
1550 rev => FEATURE_XHTML2_ED,
1551
1552 ## Role
1553 role => FEATURE_XHTML2_ED,
1554
1555 ## Style
1556 style => FEATURE_HTML5_REC,
1557 );
1558
1559 my %HTMLM12NXHTML2CommonAttrStatus = (
1560 %HTMLM12NCommonAttrStatus,
1561 %XHTML2CommonAttrStatus,
1562
1563 about => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1564 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1565 content => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1566 datatype => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1567 dir => FEATURE_HTML5_REC,
1568 href => FEATURE_RDFA_REC,
1569 id => FEATURE_HTML5_REC,
1570 instanceof => FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED,
1571 property => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1572 rel => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1573 resource => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1574 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
1575 #style => FEATURE_HTML5_WD | FEATURE_XHTMLBASIC11_CR_DEPRECATED |
1576 # FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1577 style => FEATURE_HTML5_REC,
1578 title => FEATURE_HTML5_REC,
1579 typeof => FEATURE_RDFA_REC,
1580 );
1581
1582 for (qw/
1583 onabort onblur onchange onclick oncontextmenu
1584 ondblclick ondrag ondragend ondragenter ondragleave ondragover
1585 ondragstart ondrop onerror onfocus onkeydown onkeypress
1586 onkeyup onload onmousedown onmousemove onmouseout
1587 onmouseover onmouseup onmousewheel onscroll onselect
1588 onsubmit
1589 /) {
1590 $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1591 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
1592 }
1593
1594 for (qw/
1595 onbeforeunload onhashchange onresize onstorage onunload
1596 ondataunavailable
1597 onmessage
1598 /) {
1599 $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
1600 $HTMLAttrStatus{$_} = FEATURE_HTML5_DROPPED;
1601 }
1602
1603 ## NOTE: Non-standard global attributes in the HTML namespace.
1604 $AttrChecker->{$HTML_NS}->{''} = sub {}; # no syntactical checks
1605 $AttrStatus->{$HTML_NS}->{''} = 0; # disallowed and not part of any standard
1606
1607 $AttrStatus->{$HTML_NS}->{active} = FEATURE_HTML5_DROPPED;
1608 for (qw/repeat repeat-max repeat-min repeat-start repeat-template/) {
1609 $AttrChecker->{$HTML_NS}->{$_} = $HTMLAttrChecker->{$_};
1610 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_WF2;
1611 }
1612
1613 for (qw/about content datatype property rel resource rev/) {
1614 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_RDFA_REC | FEATURE_XHTML2_ED;
1615 }
1616 $AttrStatus->{$HTML_NS}->{instanceof} = FEATURE_RDFA_LC_DROPPED | FEATURE_XHTML2_ED;
1617 $AttrStatus->{$HTML_NS}->{typeof} = FEATURE_RDFA_REC;
1618 $AttrStatus->{$HTML_NS}->{role} = FEATURE_ROLE_LC;
1619 for (qw/cite coords datetime edit encoding href hreflang hrefmedia hreftype
1620 ismap layout media nextfocus prevfocus shape src srctype style
1621 target usemap/) {
1622 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_XHTML2_ED;
1623 }
1624 for (qw/class dir id title/) {
1625 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC | FEATURE_XHTML2_ED;
1626 }
1627 for (qw/onclick ondblclick onmousedown onmouseup onmouseover onmousemove
1628 onmouseout onkeypress onkeydown onkeyup/) {
1629 $AttrStatus->{$HTML_NS}->{$_} = FEATURE_M12N11_LC;
1630 }
1631
1632 my $HTMLDatasetAttrChecker = sub {
1633 ## NOTE: "Authors should ... when the attributes are ignored and
1634 ## any associated CSS dropped, the page is still usable." (semantic
1635 ## constraint.)
1636 }; # $HTMLDatasetAttrChecker
1637
1638 my $HTMLDatasetAttrStatus = FEATURE_HTML5_LC;
1639
1640 my $GetHTMLAttrsChecker = sub {
1641 my $element_specific_checker = shift;
1642 my $element_specific_status = shift;
1643 return sub {
1644 my ($self, $item, $element_state) = @_;
1645 for my $attr (@{$item->{node}->attributes}) {
1646 my $attr_ns = $attr->namespace_uri;
1647 $attr_ns = '' unless defined $attr_ns;
1648 my $attr_ln = $attr->manakai_local_name;
1649 my $checker;
1650 my $status;
1651 if ($attr_ns eq '') {
1652 if ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
1653 $attr_ln !~ /[A-Z]/) {
1654 $checker = $HTMLDatasetAttrChecker;
1655 $status = $HTMLDatasetAttrStatus;
1656 } else {
1657 $checker = $element_specific_checker->{$attr_ln}
1658 || $HTMLAttrChecker->{$attr_ln};
1659 $status = $element_specific_status->{$attr_ln};
1660 }
1661 }
1662 $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1663 || $AttrChecker->{$attr_ns}->{''};
1664 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
1665 || $AttrStatus->{$attr_ns}->{''};
1666 $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
1667 if ($checker) {
1668 $checker->($self, $attr, $item, $element_state);
1669 } elsif ($attr_ns eq '' and not $element_specific_status->{$attr_ln}) {
1670 #
1671 } else {
1672 $self->{onerror}->(node => $attr,
1673 type => 'unknown attribute',
1674 level => $self->{level}->{uncertain});
1675 ## ISSUE: No conformance createria for unknown attributes in the spec
1676 }
1677 $self->_attr_status_info ($attr, $status);
1678 }
1679 };
1680 }; # $GetHTMLAttrsChecker
1681
1682 my %HTMLChecker = (
1683 %Whatpm::ContentChecker::AnyChecker,
1684 check_start => sub {
1685 my ($self, $item, $element_state) = @_;
1686
1687 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1688 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1689 },
1690 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
1691 );
1692
1693 my %HTMLEmptyChecker = (
1694 %HTMLChecker,
1695 check_child_element => sub {
1696 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1697 $child_is_transparent, $element_state) = @_;
1698 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1699 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1700 $self->{onerror}->(node => $child_el,
1701 type => 'element not allowed:minus',
1702 level => $self->{level}->{must});
1703 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1704 #
1705 } else {
1706 $self->{onerror}->(node => $child_el,
1707 type => 'element not allowed:empty',
1708 level => $self->{level}->{must});
1709 }
1710 },
1711 check_child_text => sub {
1712 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1713 if ($has_significant) {
1714 $self->{onerror}->(node => $child_node,
1715 type => 'character not allowed:empty',
1716 level => $self->{level}->{must});
1717 }
1718 },
1719 );
1720
1721 my %HTMLTextChecker = (
1722 %HTMLChecker,
1723 check_child_element => sub {
1724 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1725 $child_is_transparent, $element_state) = @_;
1726 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1727 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1728 $self->{onerror}->(node => $child_el,
1729 type => 'element not allowed:minus',
1730 level => $self->{level}->{must});
1731 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1732 #
1733 } else {
1734 $self->{onerror}->(node => $child_el, type => 'element not allowed:text',
1735 level => $self->{level}->{must});
1736 }
1737 },
1738 );
1739
1740 my %HTMLFlowContentChecker = (
1741 %HTMLChecker,
1742 check_child_element => sub {
1743 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1744 $child_is_transparent, $element_state) = @_;
1745 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1746 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1747 $self->{onerror}->(node => $child_el,
1748 type => 'element not allowed:minus',
1749 level => $self->{level}->{must});
1750 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1751 #
1752 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1753 if ($element_state->{has_non_style} or
1754 not $child_el->has_attribute_ns (undef, 'scoped')) {
1755 $self->{onerror}->(node => $child_el,
1756 type => 'element not allowed:flow style',
1757 level => $self->{level}->{must});
1758 }
1759 } elsif ($HTMLFlowContent->{$child_nsuri}->{$child_ln}) {
1760 $element_state->{has_non_style} = 1 unless $child_is_transparent;
1761 } else {
1762 $element_state->{has_non_style} = 1;
1763 $self->{onerror}->(node => $child_el,
1764 type => 'element not allowed:flow',
1765 level => $self->{level}->{must})
1766 }
1767 },
1768 check_child_text => sub {
1769 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1770 if ($has_significant) {
1771 $element_state->{has_non_style} = 1;
1772 }
1773 },
1774 check_end => sub {
1775 my ($self, $item, $element_state) = @_;
1776 ## NOTE: A modified copy of the code below is in |datagrid| checker.
1777 if ($element_state->{has_significant}) {
1778 $item->{real_parent_state}->{has_significant} = 1;
1779 } elsif ($item->{transparent}) {
1780 #
1781 } else {
1782 $self->{onerror}->(node => $item->{node},
1783 level => $self->{level}->{should},
1784 type => 'no significant content');
1785 }
1786 },
1787 );
1788
1789 my %HTMLPhrasingContentChecker = (
1790 %HTMLChecker,
1791 check_child_element => sub {
1792 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1793 $child_is_transparent, $element_state) = @_;
1794 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1795 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1796 $self->{onerror}->(node => $child_el,
1797 type => 'element not allowed:minus',
1798 level => $self->{level}->{must});
1799 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1800 #
1801 } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
1802 #
1803 } else {
1804 $self->{onerror}->(node => $child_el,
1805 type => 'element not allowed:phrasing',
1806 level => $self->{level}->{must});
1807 }
1808 },
1809 check_end => $HTMLFlowContentChecker{check_end},
1810 ## NOTE: The definition for |li| assumes that the only differences
1811 ## between flow and phrasing content checkers are |check_child_element|
1812 ## and |check_child_text|.
1813 );
1814
1815 my %HTMLTransparentChecker = %HTMLFlowContentChecker;
1816 ## ISSUE: Significant content rule should be applied to transparent element
1817 ## with parent?
1818
1819 our $Element;
1820 our $ElementDefault;
1821
1822 $Element->{$HTML_NS}->{''} = {
1823 %HTMLChecker,
1824 };
1825
1826 $Element->{$HTML_NS}->{html} = {
1827 status => FEATURE_HTML5_REC,
1828 is_root => 1,
1829 check_attrs => $GetHTMLAttrsChecker->({
1830 manifest => $HTMLURIAttrChecker,
1831 version => sub {
1832 ## NOTE: According to HTML4 prose, this is a "cdata" attribute.
1833 ## Though DTDs of various versions of HTML define the attribute
1834 ## as |#FIXED|, this conformance checker does no check for
1835 ## the attribute value, since what kind of check should be done
1836 ## is unknown.
1837 },
1838 }, {
1839 %HTMLAttrStatus,
1840 %XHTML2CommonAttrStatus,
1841 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1842 dir => FEATURE_HTML5_REC,
1843 id => FEATURE_HTML5_REC,
1844 lang => FEATURE_HTML5_REC,
1845 manifest => FEATURE_HTML5_WD,
1846 sdaform => FEATURE_HTML20_RFC,
1847 version => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
1848 }),
1849 check_start => sub {
1850 my ($self, $item, $element_state) = @_;
1851 $element_state->{phase} = 'before head';
1852
1853 $element_state->{uri_info}->{manifest}->{type}->{resource} = 1;
1854 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
1855 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
1856 },
1857 check_child_element => sub {
1858 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1859 $child_is_transparent, $element_state) = @_;
1860 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1861 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1862 $self->{onerror}->(node => $child_el,
1863 type => 'element not allowed:minus',
1864 level => $self->{level}->{must});
1865 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1866 #
1867 } elsif ($element_state->{phase} eq 'before head') {
1868 if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
1869 $element_state->{phase} = 'after head';
1870 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1871 $self->{onerror}->(node => $child_el,
1872 type => 'ps element missing',
1873 text => 'head',
1874 level => $self->{level}->{must});
1875 $element_state->{phase} = 'after body';
1876 } else {
1877 $self->{onerror}->(node => $child_el,
1878 type => 'element not allowed',
1879 level => $self->{level}->{must});
1880 }
1881 } elsif ($element_state->{phase} eq 'after head') {
1882 if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
1883 $element_state->{phase} = 'after body';
1884 } else {
1885 $self->{onerror}->(node => $child_el,
1886 type => 'element not allowed',
1887 level => $self->{level}->{must});
1888 }
1889 } elsif ($element_state->{phase} eq 'after body') {
1890 $self->{onerror}->(node => $child_el,
1891 type => 'element not allowed',
1892 level => $self->{level}->{must});
1893 } else {
1894 die "check_child_element: Bad |html| phase: $element_state->{phase}";
1895 }
1896 },
1897 check_child_text => sub {
1898 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1899 if ($has_significant) {
1900 $self->{onerror}->(node => $child_node,
1901 type => 'character not allowed',
1902 level => $self->{level}->{must});
1903 }
1904 },
1905 check_end => sub {
1906 my ($self, $item, $element_state) = @_;
1907 if ($element_state->{phase} eq 'after body') {
1908 #
1909 } elsif ($element_state->{phase} eq 'before head') {
1910 $self->{onerror}->(node => $item->{node},
1911 type => 'child element missing',
1912 text => 'head',
1913 level => $self->{level}->{must});
1914 $self->{onerror}->(node => $item->{node},
1915 type => 'child element missing',
1916 text => 'body',
1917 level => $self->{level}->{must});
1918 } elsif ($element_state->{phase} eq 'after head') {
1919 $self->{onerror}->(node => $item->{node},
1920 type => 'child element missing',
1921 text => 'body',
1922 level => $self->{level}->{must});
1923 } else {
1924 die "check_end: Bad |html| phase: $element_state->{phase}";
1925 }
1926
1927 $HTMLChecker{check_end}->(@_);
1928 },
1929 };
1930
1931 $Element->{$HTML_NS}->{head} = {
1932 status => FEATURE_HTML5_REC,
1933 check_attrs => $GetHTMLAttrsChecker->({
1934 profile => $HTMLSpaceURIsAttrChecker, ## NOTE: MUST be profile URIs.
1935 }, {
1936 %HTMLAttrStatus,
1937 %XHTML2CommonAttrStatus,
1938 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
1939 dir => FEATURE_HTML5_REC,
1940 id => FEATURE_HTML5_REC,
1941 lang => FEATURE_HTML5_REC,
1942 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
1943 }),
1944 check_child_element => sub {
1945 my ($self, $item, $child_el, $child_nsuri, $child_ln,
1946 $child_is_transparent, $element_state) = @_;
1947 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
1948 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
1949 $self->{onerror}->(node => $child_el,
1950 type => 'element not allowed:minus',
1951 level => $self->{level}->{must});
1952 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1953 #
1954 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
1955 unless ($element_state->{has_title}) {
1956 $element_state->{has_title} = 1;
1957 } else {
1958 $self->{onerror}->(node => $child_el,
1959 type => 'element not allowed:head title',
1960 level => $self->{level}->{must});
1961 }
1962 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
1963 if ($child_el->has_attribute_ns (undef, 'scoped')) {
1964 $self->{onerror}->(node => $child_el,
1965 type => 'element not allowed:head style',
1966 level => $self->{level}->{must});
1967 }
1968 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
1969 #
1970
1971 ## NOTE: |meta| is a metadata content. However, strictly speaking,
1972 ## a |meta| element with none of |charset|, |name|,
1973 ## or |http-equiv| attribute is not allowed. It is non-conforming
1974 ## anyway.
1975
1976 ## TODO: |form| MUST be empty and in XML [WF2].
1977 } else {
1978 $self->{onerror}->(node => $child_el,
1979 type => 'element not allowed:metadata',
1980 level => $self->{level}->{must});
1981 }
1982 $element_state->{in_head_original} = $self->{flag}->{in_head};
1983 $self->{flag}->{in_head} = 1;
1984 },
1985 check_child_text => sub {
1986 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1987 if ($has_significant) {
1988 $self->{onerror}->(node => $child_node, type => 'character not allowed',
1989 level => $self->{level}->{must});
1990 }
1991 },
1992 check_end => sub {
1993 my ($self, $item, $element_state) = @_;
1994 unless ($element_state->{has_title}) {
1995 $self->{onerror}->(node => $item->{node},
1996 type => 'child element missing',
1997 text => 'title',
1998 level => $self->{level}->{must});
1999 }
2000 $self->{flag}->{in_head} = $element_state->{in_head_original};
2001
2002 $HTMLChecker{check_end}->(@_);
2003 },
2004 };
2005
2006 $Element->{$HTML_NS}->{title} = {
2007 %HTMLTextChecker,
2008 status => FEATURE_HTML5_REC,
2009 check_attrs => $GetHTMLAttrsChecker->({}, {
2010 %HTMLAttrStatus,
2011 %XHTML2CommonAttrStatus,
2012 class => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_HTML2X_RFC,
2013 dir => FEATURE_HTML5_REC,
2014 id => FEATURE_HTML5_REC,
2015 lang => FEATURE_HTML5_REC,
2016 sdaform => FEATURE_HTML20_RFC,
2017 }),
2018 };
2019
2020 $Element->{$HTML_NS}->{base} = {
2021 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2022 %HTMLEmptyChecker,
2023 check_attrs => sub {
2024 my ($self, $item, $element_state) = @_;
2025
2026 if ($self->{has_base}) {
2027 $self->{onerror}->(node => $item->{node},
2028 type => 'element not allowed:base',
2029 level => $self->{level}->{must});
2030 } else {
2031 $self->{has_base} = 1;
2032 }
2033
2034 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
2035 my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
2036
2037 if ($self->{has_uri_attr} and $has_href) {
2038 ## ISSUE: Are these examples conforming?
2039 ## <head profile="a b c"><base href> (except for |profile|'s
2040 ## non-conformance)
2041 ## <title xml:base="relative"/><base href/> (maybe it should be)
2042 ## <unknown xmlns="relative"/><base href/> (assuming that
2043 ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
2044 ## <style>@import 'relative';</style><base href>
2045 ## <script>location.href = 'relative';</script><base href>
2046 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
2047 ## an exception.
2048 $self->{onerror}->(node => $item->{node},
2049 type => 'basehref after URL attribute',
2050 level => $self->{level}->{must});
2051 }
2052 if ($self->{has_hyperlink_element} and $has_target) {
2053 ## ISSUE: Are these examples conforming?
2054 ## <head><title xlink:href=""/><base target="name"/></head>
2055 ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
2056 ## (assuming that |xbl:xbl| is allowed before |base|)
2057 ## NOTE: These are non-conformant anyway because of |head|'s content model:
2058 ## <link href=""/><base target="name"/>
2059 ## <link rel=unknown href=""><base target=name>
2060 $self->{onerror}->(node => $item->{node},
2061 type => 'basetarget after hyperlink',
2062 level => $self->{level}->{must});
2063 }
2064
2065 if (not $has_href and not $has_target) {
2066 $self->{onerror}->(node => $item->{node},
2067 type => 'attribute missing:href|target',
2068 level => $self->{level}->{must});
2069 }
2070
2071 $element_state->{uri_info}->{href}->{type}->{base} = 1;
2072
2073 return $GetHTMLAttrsChecker->({
2074 href => $HTMLURIAttrChecker,
2075 target => $HTMLTargetAttrChecker,
2076 }, {
2077 %HTMLAttrStatus,
2078 href => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2079 id => FEATURE_HTML5_REC,
2080 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2081 })->($self, $item, $element_state);
2082 },
2083 };
2084
2085 $Element->{$HTML_NS}->{link} = {
2086 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2087 %HTMLEmptyChecker,
2088 check_attrs => sub {
2089 my ($self, $item, $element_state) = @_;
2090 my $sizes_attr;
2091 $GetHTMLAttrsChecker->({
2092 charset => sub {
2093 my ($self, $attr) = @_;
2094 $HTMLCharsetChecker->($attr->value, @_);
2095 },
2096 href => $HTMLURIAttrChecker,
2097 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
2098 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
2099 media => $HTMLMQAttrChecker,
2100 hreflang => $HTMLLanguageTagAttrChecker,
2101 sizes => sub {
2102 my ($self, $attr) = @_;
2103 $sizes_attr = $attr;
2104 my %word;
2105 for my $word (grep {length $_}
2106 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
2107 unless ($word{$word}) {
2108 $word{$word} = 1;
2109 if ($word eq 'any' or $word =~ /\A[1-9][0-9]*x[1-9][0-9]*\z/) {
2110 #
2111 } else {
2112 $self->{onerror}->(node => $attr,
2113 type => 'sizes:syntax error',
2114 value => $word,
2115 level => $self->{level}->{must});
2116 }
2117 } else {
2118 $self->{onerror}->(node => $attr, type => 'duplicate token',
2119 value => $word,
2120 level => $self->{level}->{must});
2121 }
2122 }
2123 },
2124 target => $HTMLTargetAttrChecker,
2125 type => $HTMLIMTAttrChecker,
2126 ## NOTE: Though |title| has special semantics,
2127 ## syntactically same as the |title| as global attribute.
2128 }, {
2129 %HTMLAttrStatus,
2130 %HTMLM12NXHTML2CommonAttrStatus,
2131 charset => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
2132 ## NOTE: |charset| attribute had been part of HTML5 spec though
2133 ## it had been commented out.
2134 href => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2135 FEATURE_M12N10_REC,
2136 hreflang => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2137 lang => FEATURE_HTML5_REC,
2138 media => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2139 methods => FEATURE_HTML20_RFC,
2140 rel => FEATURE_HTML5_LC | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
2141 FEATURE_M12N10_REC,
2142 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2143 sdapref => FEATURE_HTML20_RFC,
2144 sizes => FEATURE_HTML5_LC,
2145 target => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2146 # title: HTML5_WD | HTML5_LC | ...
2147 type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2148 urn => FEATURE_HTML20_RFC,
2149 })->($self, $item, $element_state);
2150
2151 if ($item->{node}->has_attribute_ns (undef, 'href')) {
2152 $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
2153 } else {
2154 $self->{onerror}->(node => $item->{node},
2155 type => 'attribute missing',
2156 text => 'href',
2157 level => $self->{level}->{must});
2158 }
2159
2160 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
2161 $self->{onerror}->(node => $item->{node},
2162 type => 'attribute missing',
2163 text => 'rel',
2164 level => $self->{level}->{must});
2165 }
2166
2167 if ($sizes_attr and not $element_state->{link_rel}->{icon}) {
2168 $self->{onerror}->(node => $sizes_attr,
2169 type => 'attribute not allowed',
2170 level => $self->{level}->{must});
2171 }
2172
2173 if ($element_state->{link_rel}->{alternate} and
2174 $element_state->{link_rel}->{stylesheet}) {
2175 my $title_attr = $item->{node}->get_attribute_node_ns (undef, 'title');
2176 unless ($title_attr) {
2177 $self->{onerror}->(node => $item->{node},
2178 type => 'attribute missing',
2179 text => 'title',
2180 level => $self->{level}->{must});
2181 } elsif ($title_attr->value eq '') {
2182 $self->{onerror}->(node => $title_attr,
2183 type => 'empty style sheet title',
2184 level => $self->{level}->{must});
2185 }
2186 }
2187 },
2188 };
2189
2190 $Element->{$HTML_NS}->{meta} = {
2191 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2192 %HTMLEmptyChecker,
2193 check_attrs => sub {
2194 my ($self, $item, $element_state) = @_;
2195 my $name_attr;
2196 my $http_equiv_attr;
2197 my $charset_attr;
2198 my $content_attr;
2199 for my $attr (@{$item->{node}->attributes}) {
2200 my $attr_ns = $attr->namespace_uri;
2201 $attr_ns = '' unless defined $attr_ns;
2202 my $attr_ln = $attr->manakai_local_name;
2203 my $checker;
2204 my $status;
2205 if ($attr_ns eq '') {
2206 $status = {
2207 %HTMLAttrStatus,
2208 %XHTML2CommonAttrStatus,
2209 charset => FEATURE_HTML5_WD,
2210 content => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2211 dir => FEATURE_HTML5_REC,
2212 'http-equiv' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2213 id => FEATURE_HTML5_REC,
2214 lang => FEATURE_HTML5_REC,
2215 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
2216 scheme => FEATURE_M12N10_REC,
2217 }->{$attr_ln};
2218
2219 if ($attr_ln eq 'content') {
2220 $content_attr = $attr;
2221 $checker = 1;
2222 } elsif ($attr_ln eq 'name') {
2223 $name_attr = $attr;
2224 $checker = 1;
2225 } elsif ($attr_ln eq 'http-equiv') {
2226 $http_equiv_attr = $attr;
2227 $checker = 1;
2228 } elsif ($attr_ln eq 'charset') {
2229 $charset_attr = $attr;
2230 $checker = 1;
2231 } elsif ($attr_ln eq 'scheme') {
2232 ## NOTE: <http://suika.fam.cx/2007/html/standards#html-meta-scheme>
2233 $checker = sub {};
2234 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
2235 $attr_ln !~ /[A-Z]/) {
2236 $checker = $HTMLDatasetAttrChecker;
2237 $status = $HTMLDatasetAttrStatus;
2238 } else {
2239 $checker = $HTMLAttrChecker->{$attr_ln}
2240 || $AttrChecker->{$attr_ns}->{$attr_ln}
2241 || $AttrChecker->{$attr_ns}->{''};
2242 }
2243 } else {
2244 $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2245 || $AttrChecker->{$attr_ns}->{''};
2246 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
2247 || $AttrStatus->{$attr_ns}->{''};
2248 $status = FEATURE_ALLOWED if not defined $status;
2249 }
2250
2251 if ($checker) {
2252 $checker->($self, $attr, $item, $element_state) if ref $checker;
2253 } elsif ($attr_ns eq '' and not $status) {
2254 #
2255 } else {
2256 $self->{onerror}->(node => $attr,
2257 type => 'unknown attribute',
2258 level => $self->{level}->{uncertain});
2259 ## ISSUE: No conformance createria for unknown attributes in the spec
2260 }
2261
2262 $self->_attr_status_info ($attr, $status);
2263 }
2264
2265 if (defined $name_attr) {
2266 if (defined $http_equiv_attr) {
2267 $self->{onerror}->(node => $http_equiv_attr,
2268 type => 'attribute not allowed',
2269 level => $self->{level}->{must});
2270 } elsif (defined $charset_attr) {
2271 $self->{onerror}->(node => $charset_attr,
2272 type => 'attribute not allowed',
2273 level => $self->{level}->{must});
2274 }
2275 my $metadata_name = $name_attr->value;
2276 my $metadata_value;
2277 if (defined $content_attr) {
2278 $metadata_value = $content_attr->value;
2279 } else {
2280 $self->{onerror}->(node => $item->{node},
2281 type => 'attribute missing',
2282 text => 'content',
2283 level => $self->{level}->{must});
2284 $metadata_value = '';
2285 }
2286 } elsif (defined $http_equiv_attr) {
2287 if (defined $charset_attr) {
2288 $self->{onerror}->(node => $charset_attr,
2289 type => 'attribute not allowed',
2290 level => $self->{level}->{must});
2291 }
2292 unless (defined $content_attr) {
2293 $self->{onerror}->(node => $item->{node},
2294 type => 'attribute missing',
2295 text => 'content',
2296 level => $self->{level}->{must});
2297 }
2298 } elsif (defined $charset_attr) {
2299 if (defined $content_attr) {
2300 $self->{onerror}->(node => $content_attr,
2301 type => 'attribute not allowed',
2302 level => $self->{level}->{must});
2303 }
2304 } else {
2305 if (defined $content_attr) {
2306 $self->{onerror}->(node => $content_attr,
2307 type => 'attribute not allowed',
2308 level => $self->{level}->{must});
2309 $self->{onerror}->(node => $item->{node},
2310 type => 'attribute missing:name|http-equiv',
2311 level => $self->{level}->{must});
2312 } else {
2313 $self->{onerror}->(node => $item->{node},
2314 type => 'attribute missing:name|http-equiv|charset',
2315 level => $self->{level}->{must});
2316 }
2317 }
2318
2319 my $check_charset_decl = sub () {
2320 my $parent = $item->{node}->manakai_parent_element;
2321 if ($parent and $parent eq $parent->owner_document->manakai_head) {
2322 for my $el (@{$parent->child_nodes}) {
2323 next unless $el->node_type == 1; # ELEMENT_NODE
2324 unless ($el eq $item->{node}) {
2325 ## NOTE: Not the first child element.
2326 $self->{onerror}->(node => $item->{node},
2327 type => 'element not allowed:meta charset',
2328 level => $self->{level}->{must});
2329 }
2330 last;
2331 ## NOTE: Entity references are not supported.
2332 }
2333 } else {
2334 $self->{onerror}->(node => $item->{node},
2335 type => 'element not allowed:meta charset',
2336 level => $self->{level}->{must});
2337 }
2338 }; # $check_charset_decl
2339
2340 my $check_charset = sub ($$) {
2341 my ($attr, $charset_value) = @_;
2342
2343 my $charset;
2344 ($charset, $charset_value)
2345 = $HTMLCharsetChecker->($charset_value, $self, $attr);
2346
2347 my $ic = $item->{node}->owner_document->input_encoding;
2348 if (defined $ic) {
2349 ## TODO: Test for this case
2350 my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
2351 if ($charset ne $ic_charset) {
2352 $self->{onerror}->(node => $attr,
2353 type => 'mismatched charset name',
2354 text => $ic,
2355 value => $charset_value,
2356 level => $self->{level}->{must});
2357 }
2358 } else {
2359 ## NOTE: MUST, but not checkable, since the document is not originally
2360 ## in serialized form (or the parser does not preserve the input
2361 ## encoding information).
2362 $self->{onerror}->(node => $attr,
2363 type => 'mismatched charset name not checked',
2364 value => $charset_value,
2365 level => $self->{level}->{uncertain});
2366 }
2367
2368 if ($attr->get_user_data ('manakai_has_reference')) {
2369 $self->{onerror}->(node => $attr,
2370 type => 'charref in charset',
2371 level => $self->{level}->{must},
2372 layer => 'syntax');
2373 }
2374 }; # $check_charset
2375
2376 ## TODO: metadata conformance
2377
2378 ## -- The |http-equiv| attribute (pragmas)
2379 if (defined $http_equiv_attr) { ## An enumerated attribute
2380 my $keyword = $http_equiv_attr->value;
2381 $keyword =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
2382
2383 if ($self->{has_http_equiv}->{$keyword}) {
2384 $self->{onerror}->(type => 'duplicate http-equiv', value => $keyword,
2385 node => $http_equiv_attr,
2386 level => $self->{level}->{must});
2387 } else {
2388 $self->{has_http_equiv}->{$keyword} = 1;
2389 }
2390
2391 if ($keyword eq 'content-type') {
2392 ## TODO: refs in "text/html; charset=" are not disallowed since rev.1275.
2393
2394 $check_charset_decl->();
2395
2396 unless ($item->{node}->owner_document->manakai_is_html) {
2397 $self->{onerror}->(node => $item->{node},
2398 type => 'in XML:charset',
2399 level => $self->{level}->{must});
2400 }
2401
2402 if ($content_attr) {
2403 my $content = $content_attr->value;
2404 if ($content =~ m!^[Tt][Ee][Xx][Tt]/[Hh][Tt][Mm][Ll];
2405 [\x09\x0A\x0C\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2406 =(.+)\z!sx) {
2407 $check_charset->($content_attr, $1);
2408 } else {
2409 $self->{onerror}->(node => $content_attr,
2410 type => 'meta content-type syntax error',
2411 level => $self->{level}->{must});
2412 }
2413 }
2414 } elsif ($keyword eq 'default-style') {
2415 ## XXX no author requirement in the spec
2416
2417 } elsif ($keyword eq 'refresh') {
2418 if ($content_attr) {
2419 my $content = $content_attr->value;
2420 if ($content =~ /\A[0-9]+\z/) {
2421 ## NOTE: Valid non-negative integer.
2422 #
2423 } elsif ($content =~ s/\A[0-9]+;[\x09\x0A\x0C\x0D\x20]+[Uu][Rr][Ll]=//) {
2424 ## XXXURL
2425 Whatpm::URIChecker->check_iri_reference ($content, sub {
2426 $self->{onerror}->(value => $content, @_, node => $content_attr);
2427 }, $self->{level});
2428 $self->{has_uri_attr} = 1; ## NOTE: One of "attributes with URLs".
2429
2430 $element_state->{uri_info}->{content}->{node} = $content_attr;
2431 $element_state->{uri_info}->{content}->{type}->{hyperlink} = 1;
2432 ## XXXTODO: absolute
2433 push @{$self->{return}->{uri}->{$content} ||= []},
2434 $element_state->{uri_info}->{content};
2435 } else {
2436 $self->{onerror}->(node => $content_attr,
2437 type => 'refresh:syntax error',
2438 level => $self->{level}->{must});
2439 }
2440 }
2441 } elsif ($keyword eq 'content-language') {
2442 if ($content_attr) {
2443 my $content = $content_attr->value;
2444 require Whatpm::LangTag;
2445 ## XXX In fact what the spec requires is "BCP 47 langauge code".
2446 Whatpm::LangTag->check_rfc3066_language_tag ($content, sub {
2447 $self->{onerror}->(@_, node => $content_attr);
2448 }, $self->{level});
2449 }
2450
2451 ## XXX This is conforming but obsolete.
2452 } else {
2453 ## NOTE: |Content-Style-Type| and |Content-Script-Type|
2454 ## pragmas are listed in the table of the spec in the
2455 ## commented-out form, but there is no author requirement
2456 ## (even commented-out one isn't there).
2457
2458 ## NOTE: Pragma extensions are listed in
2459 ## <http://wiki.whatwg.org/wiki/PragmaExtensions>. At the
2460 ## time of writing, no extension has been registered yet.
2461
2462 $self->{onerror}->(node => $http_equiv_attr,
2463 type => 'enumerated:invalid',
2464 level => $self->{level}->{must});
2465 }
2466 }
2467
2468 if (defined $charset_attr) {
2469 my $value = $charset_attr->value;
2470
2471 $check_charset_decl->();
2472 $check_charset->($charset_attr, $value);
2473
2474 if (not $item->{node}->owner_document->manakai_is_html and
2475 not $value =~ /\A[Uu][Tt][Ff]-8\z/) {
2476 $self->{onerror}->(node => $item->{node},
2477 type => 'in XML:charset',
2478 level => $self->{level}->{must});
2479 }
2480 }
2481 },
2482 };
2483
2484 $Element->{$HTML_NS}->{style} = {
2485 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2486 %HTMLChecker,
2487 check_attrs => $GetHTMLAttrsChecker->({
2488 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
2489 media => $HTMLMQAttrChecker,
2490 scoped => $GetHTMLBooleanAttrChecker->('scoped'),
2491 ## NOTE: |title| has special semantics for |style|s, but is syntactically
2492 ## not different
2493 }, {
2494 %HTMLAttrStatus,
2495 %XHTML2CommonAttrStatus,
2496 dir => FEATURE_HTML5_REC,
2497 disabled => FEATURE_XHTML2_ED,
2498 href => FEATURE_RDFA_REC | FEATURE_XHTML2_ED,
2499 id => FEATURE_HTML5_REC,
2500 lang => FEATURE_HTML5_REC,
2501 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2502 scoped => FEATURE_HTML5_FD,
2503 title => FEATURE_HTML5_REC,
2504 type => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2505 }),
2506 check_start => sub {
2507 my ($self, $item, $element_state) = @_;
2508
2509 ## NOTE: |html:style| itself has no conformance creteria on content model.
2510 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2511 $type = 'text/css' unless defined $type;
2512 if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
2513 $type = "$1/$2";
2514 $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
2515 } else {
2516 ## NOTE: We don't know how parameters are handled by UAs. According to
2517 ## HTML5 specification, <style> with unknown parameters in |type=""|
2518 ## must be ignored.
2519 undef $type;
2520 }
2521 if (not defined $type) {
2522 $element_state->{allow_element} = 1; # invalid type=""
2523 } elsif ($type eq 'text/css') {
2524 $element_state->{allow_element} = 0;
2525 #} elsif ($type =~ m![/+][Xx][Mm][Ll]\z!) {
2526 # ## NOTE: There is no definition for "XML-based styling language" in HTML5
2527 # $element_state->{allow_element} = 1;
2528 } else {
2529 $element_state->{allow_element} = 1; # unknown
2530 }
2531 $element_state->{style_type} = $type;
2532
2533 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2534 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2535
2536 $element_state->{text} = '';
2537 },
2538 check_child_element => sub {
2539 my ($self, $item, $child_el, $child_nsuri, $child_ln,
2540 $child_is_transparent, $element_state) = @_;
2541 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2542 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2543 $self->{onerror}->(node => $child_el,
2544 type => 'element not allowed:minus',
2545 level => $self->{level}->{must});
2546 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2547 #
2548 } elsif ($element_state->{allow_element}) {
2549 #
2550 } else {
2551 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2552 level => $self->{level}->{must});
2553 }
2554 },
2555 check_child_text => sub {
2556 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2557 $element_state->{text} .= $child_node->data;
2558 },
2559 check_end => sub {
2560 my ($self, $item, $element_state) = @_;
2561 if (not defined $element_state->{style_type}) {
2562 ## NOTE: Invalid type=""
2563 #
2564 } elsif ($element_state->{style_type} eq 'text/css') {
2565 $self->{onsubdoc}->({s => $element_state->{text},
2566 container_node => $item->{node},
2567 media_type => 'text/css', is_char_string => 1});
2568 } elsif ($element_state->{style_type} =~ m![+/][Xx][Mm][Ll]\z!) {
2569 ## NOTE: XML content should be checked by THIS instance of checker
2570 ## as part of normal tree validation. However, we don't know of any
2571 ## XML-based styling language that can be used in HTML <style> element,
2572 ## such that we throw a "style language not supported" error.
2573 $self->{onerror}->(node => $item->{node},
2574 type => 'XML style lang',
2575 text => $element_state->{style_type},
2576 level => $self->{level}->{uncertain});
2577 } else {
2578 ## NOTE: Should we raise some kind of error for,
2579 ## say, <style type="text/plaion">?
2580 $self->{onsubdoc}->({s => $element_state->{text},
2581 container_node => $item->{node},
2582 media_type => $element_state->{style_type},
2583 is_char_string => 1});
2584 }
2585
2586 $HTMLChecker{check_end}->(@_);
2587 },
2588 };
2589 ## ISSUE: Relationship to significant content check?
2590
2591 $Element->{$HTML_NS}->{body} = {
2592 %HTMLFlowContentChecker,
2593 status => FEATURE_HTML5_REC,
2594 check_attrs => $GetHTMLAttrsChecker->({
2595 alink => $HTMLColorAttrChecker,
2596 background => $HTMLURIAttrChecker,
2597 bgcolor => $HTMLColorAttrChecker,
2598 link => $HTMLColorAttrChecker,
2599 onafterprint => $HTMLEventHandlerAttrChecker,
2600 onbeforeprint => $HTMLEventHandlerAttrChecker,
2601 onbeforeunload => $HTMLEventHandlerAttrChecker,
2602 onblur => $HTMLEventHandlerAttrChecker,
2603 onerror => $HTMLEventHandlerAttrChecker,
2604 onfocus => $HTMLEventHandlerAttrChecker,
2605 onhashchange => $HTMLEventHandlerAttrChecker,
2606 onload => $HTMLEventHandlerAttrChecker,
2607 onmessage => $HTMLEventHandlerAttrChecker,
2608 onoffline => $HTMLEventHandlerAttrChecker,
2609 ononline => $HTMLEventHandlerAttrChecker,
2610 onpopstate => $HTMLEventHandlerAttrChecker,
2611 onredo => $HTMLEventHandlerAttrChecker,
2612 onresize => $HTMLEventHandlerAttrChecker,
2613 onstorage => $HTMLEventHandlerAttrChecker,
2614 onundo => $HTMLEventHandlerAttrChecker,
2615 onunload => $HTMLEventHandlerAttrChecker,
2616 text => $HTMLColorAttrChecker,
2617 vlink => $HTMLColorAttrChecker,
2618 }, {
2619 %HTMLAttrStatus,
2620 %HTMLM12NXHTML2CommonAttrStatus,
2621 alink => FEATURE_M12N10_REC_DEPRECATED,
2622 background => FEATURE_M12N10_REC_DEPRECATED,
2623 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2624 lang => FEATURE_HTML5_REC,
2625 link => FEATURE_M12N10_REC_DEPRECATED,
2626 onafterprint => FEATURE_HTML5_LC,
2627 onbeforeprint => FEATURE_HTML5_LC,
2628 onbeforeunload => FEATURE_HTML5_LC,
2629 onblur => FEATURE_HTML5_LC,
2630 onerror => FEATURE_HTML5_LC,
2631 onfocus => FEATURE_HTML5_LC,
2632 onhashchange => FEATURE_HTML5_LC,
2633 onload => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2634 onmessage => FEATURE_HTML5_LC,
2635 onoffline => FEATURE_HTML5_LC,
2636 ononline => FEATURE_HTML5_LC,
2637 onpopstate => FEATURE_HTML5_LC,
2638 onredo => FEATURE_HTML5_LC,
2639 onresize => FEATURE_HTML5_LC,
2640 onstorage => FEATURE_HTML5_LC,
2641 onundo => FEATURE_HTML5_LC,
2642 onunload => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2643 text => FEATURE_M12N10_REC_DEPRECATED,
2644 vlink => FEATURE_M12N10_REC_DEPRECATED,
2645 }),
2646 check_start => sub {
2647 my ($self, $item, $element_state) = @_;
2648
2649 $element_state->{uri_info}->{background}->{type}->{embedded} = 1;
2650 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2651 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2652 },
2653 };
2654
2655 $Element->{$HTML_NS}->{section} = {
2656 %HTMLFlowContentChecker,
2657 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED,
2658 check_attrs => $GetHTMLAttrsChecker->({
2659 cite => $HTMLURIAttrChecker,
2660 }, {
2661 %HTMLAttrStatus,
2662 %XHTML2CommonAttrStatus,
2663 cite => FEATURE_HTML5_DROPPED | FEATURE_XHTML2_ED,
2664 }),
2665 };
2666
2667 $Element->{$HTML_NS}->{nav} = {
2668 status => FEATURE_HTML5_LC,
2669 %HTMLFlowContentChecker,
2670 };
2671
2672 $Element->{$HTML_NS}->{article} = {
2673 %HTMLFlowContentChecker,
2674 status => FEATURE_HTML5_LC,
2675 check_attrs => $GetHTMLAttrsChecker->({
2676 cite => $HTMLURIAttrChecker,
2677 pubdate => $GetDateTimeAttrChecker->('global_date_and_time_string'),
2678 }, {
2679 %HTMLAttrStatus,
2680 cite => FEATURE_HTML5_DROPPED,
2681 pubdate => FEATURE_HTML5_LC,
2682 }),
2683 }; # article
2684
2685 $Element->{$HTML_NS}->{blockquote} = {
2686 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2687 %HTMLFlowContentChecker,
2688 check_attrs => $GetHTMLAttrsChecker->({
2689 cite => $HTMLURIAttrChecker,
2690 }, {
2691 %HTMLAttrStatus,
2692 %HTMLM12NXHTML2CommonAttrStatus,
2693 align => FEATURE_HTML2X_RFC,
2694 cite => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
2695 lang => FEATURE_HTML5_REC,
2696 sdaform => FEATURE_HTML20_RFC,
2697 }),
2698 check_start => sub {
2699 my ($self, $item, $element_state) = @_;
2700
2701 $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
2702 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2703 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2704 },
2705 };
2706
2707 $Element->{$HTML_NS}->{aside} = {
2708 status => FEATURE_HTML5_LC,
2709 %HTMLFlowContentChecker,
2710 };
2711
2712 $Element->{$HTML_NS}->{h1} = {
2713 %HTMLPhrasingContentChecker,
2714 status => FEATURE_HTML5_REC,
2715 check_attrs => $GetHTMLAttrsChecker->({
2716 align => $GetHTMLEnumeratedAttrChecker->({
2717 left => 1, center => 1, right => 1, justify => 1,
2718 }),
2719 }, {
2720 %HTMLAttrStatus,
2721 %HTMLM12NXHTML2CommonAttrStatus,
2722 align => FEATURE_M12N10_REC_DEPRECATED,
2723 lang => FEATURE_HTML5_REC,
2724 sdaform => FEATURE_HTML20_RFC,
2725 }),
2726 check_start => sub {
2727 my ($self, $item, $element_state) = @_;
2728 $self->{flag}->{has_hn} = 1;
2729
2730 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2731 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2732 },
2733 };
2734
2735 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
2736
2737 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
2738
2739 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
2740
2741 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
2742
2743 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
2744
2745 ## TODO: Explicit sectioning is "encouraged".
2746
2747 $Element->{$HTML_NS}->{hgroup} = {
2748 %HTMLChecker,
2749 status => FEATURE_HTML5_LC,
2750 check_child_element => sub {
2751 my ($self, $item, $child_el, $child_nsuri, $child_ln,
2752 $child_is_transparent, $element_state, $element_state2) = @_;
2753 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2754 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2755 $self->{onerror}->(node => $child_el,
2756 type => 'element not allowed:minus',
2757 level => $self->{level}->{must});
2758 if ($child_nsuri eq $HTML_NS and $child_ln =~ /\Ah[1-6]\z/) {
2759 $element_state2->{has_hn} = 1;
2760 }
2761 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2762 #
2763 } elsif ($child_nsuri eq $HTML_NS and $child_ln =~ /\Ah[1-6]\z/) {
2764 ## NOTE: Use $element_state2 instead of $element_state here so
2765 ## that the |h2| element in |<hgroup><ins><h2>| is not counted
2766 ## as an |h2| of the |hgroup| element.
2767 $element_state2->{has_hn} = 1;
2768 } else {
2769 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2770 level => $self->{level}->{must});
2771 }
2772 }, # check_child_element
2773 check_child_text => sub {
2774 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2775 if ($has_significant) {
2776 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2777 level => $self->{level}->{must});
2778 }
2779 }, # check_child_text
2780 check_end => sub {
2781 my ($self, $item, $element_state) = @_;
2782 unless ($element_state->{has_hn}) {
2783 $self->{onerror}->(node => $item->{node},
2784 type => 'element missing:hn',
2785 level => $self->{level}->{must});
2786 }
2787
2788 $HTMLChecker{check_end}->(@_);
2789 }, # check_end
2790 }; # hgroup
2791
2792 $Element->{$HTML_NS}->{header} = {
2793 %HTMLFlowContentChecker,
2794 status => FEATURE_HTML5_LC,
2795 check_start => sub {
2796 my ($self, $item, $element_state) = @_;
2797 $self->_add_minus_elements ($element_state,
2798 {$HTML_NS => {qw/header 1 footer 1/}});
2799 $element_state->{has_hn_original} = $self->{flag}->{has_hn};
2800 $self->{flag}->{has_hn} = 0;
2801
2802 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2803 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2804 }, # check_start
2805 check_end => sub {
2806 my ($self, $item, $element_state) = @_;
2807 $self->_remove_minus_elements ($element_state);
2808 unless ($self->{flag}->{has_hn}) {
2809 $self->{onerror}->(node => $item->{node},
2810 type => 'element missing:hn',
2811 level => $self->{level}->{warn});
2812 }
2813 $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
2814
2815 $HTMLFlowContentChecker{check_end}->(@_);
2816 }, # check_end
2817 }; # header
2818
2819 $Element->{$HTML_NS}->{footer} = {
2820 status => FEATURE_HTML5_LC,
2821 %HTMLFlowContentChecker,
2822 check_start => sub {
2823 my ($self, $item, $element_state) = @_;
2824 $self->_add_minus_elements ($element_state,
2825 {$HTML_NS => {header => 1, footer => 1}},
2826 $HTMLSectioningContent,
2827 $HTMLHeadingContent);
2828
2829 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2830 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2831 },
2832 check_end => sub {
2833 my ($self, $item, $element_state) = @_;
2834 $self->_remove_minus_elements ($element_state);
2835
2836 $HTMLFlowContentChecker{check_end}->(@_);
2837 },
2838 };
2839
2840 $Element->{$HTML_NS}->{address} = {
2841 %HTMLFlowContentChecker,
2842 status => FEATURE_HTML5_REC,
2843 check_attrs => $GetHTMLAttrsChecker->({
2844 ## TODO: add test
2845 #align => $GetHTMLEnumeratedAttrChecker->({
2846 # left => 1, center => 1, right => 1, justify => 1,
2847 #}),
2848 }, {
2849 %HTMLAttrStatus,
2850 %HTMLM12NXHTML2CommonAttrStatus,
2851 align => FEATURE_HTML2X_RFC,
2852 lang => FEATURE_HTML5_REC,
2853 sdaform => FEATURE_HTML20_RFC,
2854 sdapref => FEATURE_HTML20_RFC,
2855 }),
2856 check_start => sub {
2857 my ($self, $item, $element_state) = @_;
2858 $self->_add_minus_elements
2859 ($element_state,
2860 {$HTML_NS => {header => 1, footer => 1, address => 1}},
2861 $HTMLSectioningContent, $HTMLHeadingContent);
2862
2863 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2864 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2865 },
2866 check_end => sub {
2867 my ($self, $item, $element_state) = @_;
2868 $self->_remove_minus_elements ($element_state);
2869
2870 $HTMLFlowContentChecker{check_end}->(@_);
2871 },
2872 };
2873
2874 $Element->{$HTML_NS}->{p} = {
2875 %HTMLPhrasingContentChecker,
2876 status => FEATURE_HTML5_REC,
2877 check_attrs => $GetHTMLAttrsChecker->({
2878 align => $GetHTMLEnumeratedAttrChecker->({
2879 left => 1, center => 1, right => 1, justify => 1,
2880 }),
2881 }, {
2882 %HTMLAttrStatus,
2883 %HTMLM12NXHTML2CommonAttrStatus,
2884 align => FEATURE_M12N10_REC_DEPRECATED,
2885 lang => FEATURE_HTML5_REC,
2886 sdaform => FEATURE_HTML20_RFC,
2887 }),
2888 };
2889
2890 $Element->{$HTML_NS}->{hr} = {
2891 %HTMLEmptyChecker,
2892 status => FEATURE_HTML5_REC,
2893 check_attrs => $GetHTMLAttrsChecker->({
2894 ## TODO: HTML4 |align|, |noshade|, |size|, |width|
2895 }, {
2896 %HTMLAttrStatus,
2897 %HTMLM12NCommonAttrStatus,
2898 align => FEATURE_M12N10_REC_DEPRECATED,
2899 lang => FEATURE_HTML5_REC,
2900 noshade => FEATURE_M12N10_REC_DEPRECATED,
2901 sdapref => FEATURE_HTML20_RFC,
2902 size => FEATURE_M12N10_REC_DEPRECATED,
2903 width => FEATURE_M12N10_REC_DEPRECATED,
2904 }),
2905 };
2906
2907 $Element->{$HTML_NS}->{br} = {
2908 %HTMLEmptyChecker,
2909 status => FEATURE_HTML5_REC,
2910 check_attrs => $GetHTMLAttrsChecker->({
2911 clear => $GetHTMLEnumeratedAttrChecker->({
2912 left => 1, all => 1, right => 1, none => 1,
2913 }),
2914 }, {
2915 %HTMLAttrStatus,
2916 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
2917 clear => FEATURE_M12N10_REC_DEPRECATED,
2918 id => FEATURE_HTML5_REC,
2919 sdapref => FEATURE_HTML20_RFC,
2920 style => FEATURE_HTML5_REC,
2921 title => FEATURE_HTML5_REC,
2922 }),
2923 ## NOTE: Blank line MUST NOT be used for presentation purpose.
2924 ## (This requirement is semantic so that we cannot check.)
2925 };
2926
2927 $Element->{$HTML_NS}->{dialog} = {
2928 status => FEATURE_HTML5_WD,
2929 %HTMLChecker,
2930 check_start => sub {
2931 my ($self, $item, $element_state) = @_;
2932 $element_state->{phase} = 'before dt';
2933
2934 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
2935 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
2936 },
2937 check_child_element => sub {
2938 my ($self, $item, $child_el, $child_nsuri, $child_ln,
2939 $child_is_transparent, $element_state) = @_;
2940 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
2941 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
2942 $self->{onerror}->(node => $child_el,
2943 type => 'element not allowed:minus',
2944 level => $self->{level}->{must});
2945 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2946 #
2947 } elsif ($element_state->{phase} eq 'before dt') {
2948 if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2949 $element_state->{phase} = 'before dd';
2950 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2951 $self->{onerror}
2952 ->(node => $child_el, type => 'ps element missing',
2953 text => 'dt',
2954 level => $self->{level}->{must});
2955 $element_state->{phase} = 'before dt';
2956 } else {
2957 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2958 level => $self->{level}->{must});
2959 }
2960 } elsif ($element_state->{phase} eq 'before dd') {
2961 if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
2962 $element_state->{phase} = 'before dt';
2963 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
2964 $self->{onerror}
2965 ->(node => $child_el, type => 'ps element missing',
2966 text => 'dd',
2967 level => $self->{level}->{must});
2968 $element_state->{phase} = 'before dd';
2969 } else {
2970 $self->{onerror}->(node => $child_el, type => 'element not allowed',
2971 level => $self->{level}->{must});
2972 }
2973 } else {
2974 die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
2975 }
2976 },
2977 check_child_text => sub {
2978 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2979 if ($has_significant) {
2980 $self->{onerror}->(node => $child_node, type => 'character not allowed',
2981 level => $self->{level}->{must});
2982 }
2983 },
2984 check_end => sub {
2985 my ($self, $item, $element_state) = @_;
2986 if ($element_state->{phase} eq 'before dd') {
2987 $self->{onerror}->(node => $item->{node},
2988 type => 'child element missing',
2989 text => 'dd',
2990 level => $self->{level}->{must});
2991 }
2992
2993 $HTMLChecker{check_end}->(@_);
2994 },
2995 };
2996
2997 $Element->{$HTML_NS}->{pre} = {
2998 %HTMLPhrasingContentChecker,
2999 status => FEATURE_HTML5_REC,
3000 check_attrs => $GetHTMLAttrsChecker->({
3001 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
3002 }, {
3003 %HTMLAttrStatus,
3004 %HTMLM12NXHTML2CommonAttrStatus,
3005 lang => FEATURE_HTML5_REC,
3006 sdaform => FEATURE_HTML20_RFC,
3007 width => FEATURE_M12N10_REC_DEPRECATED,
3008 }),
3009 check_end => sub {
3010 my ($self, $item, $element_state) = @_;
3011
3012 ## TODO: Flag to enable/disable IDL checking?
3013 my $class = $item->{node}->get_attribute_ns (undef, 'class');
3014 if ($class =~ /\bidl(?>-code)?\b/) { ## TODO: use classList.has
3015 ## NOTE: pre.idl: WHATWG, XHR, Selectors API, CSSOM specs
3016 ## NOTE: pre.code > code.idl-code: WebIDL spec
3017 ## NOTE: pre.idl-code: DOM1 spec
3018 ## NOTE: div.idl-code > pre: DOM, ProgressEvent specs
3019 ## NOTE: pre.schema: ReSpec-generated specs
3020 $self->{onsubdoc}->({s => $item->{node}->text_content,
3021 container_node => $item->{node},
3022 media_type => 'text/x-webidl',
3023 is_char_string => 1});
3024 }
3025
3026 $HTMLPhrasingContentChecker{check_end}->(@_);
3027 },
3028 };
3029
3030 $Element->{$HTML_NS}->{ol} = {
3031 %HTMLChecker,
3032 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3033 check_attrs => $GetHTMLAttrsChecker->({
3034 compact => $GetHTMLBooleanAttrChecker->('compact'),
3035 reversed => $GetHTMLBooleanAttrChecker->('reversed'),
3036 start => $HTMLIntegerAttrChecker,
3037 ## TODO: HTML4 |type|
3038 }, {
3039 %HTMLAttrStatus,
3040 %HTMLM12NXHTML2CommonAttrStatus,
3041 align => FEATURE_HTML2X_RFC,
3042 compact => FEATURE_M12N10_REC_DEPRECATED,
3043 lang => FEATURE_HTML5_REC,
3044 reversed => FEATURE_HTML5_WD,
3045 sdaform => FEATURE_HTML20_RFC,
3046 #start => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
3047 start => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3048 type => FEATURE_M12N10_REC_DEPRECATED,
3049 }),
3050 check_child_element => sub {
3051 my ($self, $item, $child_el, $child_nsuri, $child_ln,
3052 $child_is_transparent, $element_state) = @_;
3053 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3054 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3055 $self->{onerror}->(node => $child_el,
3056 type => 'element not allowed:minus',
3057 level => $self->{level}->{must});
3058 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3059 #
3060 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3061 #
3062 } else {
3063 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3064 level => $self->{level}->{must});
3065 }
3066 },
3067 check_child_text => sub {
3068 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3069 if ($has_significant) {
3070 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3071 level => $self->{level}->{must});
3072 }
3073 },
3074 };
3075
3076 $Element->{$HTML_NS}->{ul} = {
3077 %{$Element->{$HTML_NS}->{ol}},
3078 status => FEATURE_HTML5_REC,
3079 check_attrs => $GetHTMLAttrsChecker->({
3080 compact => $GetHTMLBooleanAttrChecker->('compact'),
3081 ## TODO: HTML4 |type|
3082 ## TODO: sdaform, align
3083 }, {
3084 %HTMLAttrStatus,
3085 %HTMLM12NXHTML2CommonAttrStatus,
3086 align => FEATURE_HTML2X_RFC,
3087 compact => FEATURE_M12N10_REC_DEPRECATED,
3088 lang => FEATURE_HTML5_REC,
3089 sdaform => FEATURE_HTML20_RFC,
3090 type => FEATURE_M12N10_REC_DEPRECATED,
3091 }),
3092 };
3093
3094 $Element->{$HTML_NS}->{dir} = {
3095 ## TODO: %block; is not allowed [HTML4] ## TODO: Empty list allowed?
3096 %{$Element->{$HTML_NS}->{ul}},
3097 status => FEATURE_M12N10_REC_DEPRECATED,
3098 check_attrs => $GetHTMLAttrsChecker->({
3099 compact => $GetHTMLBooleanAttrChecker->('compact'),
3100 }, {
3101 %HTMLAttrStatus,
3102 %HTMLM12NCommonAttrStatus,
3103 align => FEATURE_HTML2X_RFC,
3104 compact => FEATURE_M12N10_REC_DEPRECATED,
3105 lang => FEATURE_HTML5_REC,
3106 sdaform => FEATURE_HTML20_RFC,
3107 sdapref => FEATURE_HTML20_RFC,
3108 }),
3109 };
3110
3111 $Element->{$HTML_NS}->{li} = {
3112 %HTMLFlowContentChecker,
3113 status => FEATURE_HTML5_REC,
3114 check_attrs => $GetHTMLAttrsChecker->({
3115 ## TODO: HTML4 |type|
3116 value => sub {
3117 my ($self, $attr) = @_;
3118
3119 my $parent_is_ol;
3120 my $parent = $attr->owner_element->manakai_parent_element;
3121 if (defined $parent) {
3122 my $parent_ns = $parent->namespace_uri;
3123 $parent_ns = '' unless defined $parent_ns;
3124 my $parent_ln = $parent->manakai_local_name;
3125 $parent_is_ol = ($parent_ns eq $HTML_NS and $parent_ln eq 'ol');
3126 }
3127
3128 unless ($parent_is_ol) {
3129 ## ISSUE: No "MUST" in the spec.
3130 $self->{onerror}->(node => $attr,
3131 type => 'non-ol li value',
3132 level => $self->{level}->{html5_fact});
3133 }
3134
3135 $HTMLIntegerAttrChecker->($self, $attr);
3136 },
3137 }, {
3138 %HTMLAttrStatus,
3139 %HTMLM12NXHTML2CommonAttrStatus,
3140 align => FEATURE_HTML2X_RFC,
3141 lang => FEATURE_HTML5_REC,
3142 sdaform => FEATURE_HTML20_RFC,
3143 type => FEATURE_M12N10_REC_DEPRECATED,
3144 #value => FEATURE_HTML5_LC | FEATURE_XHTMLBASIC11_CR |
3145 # FEATURE_M12N10_REC_DEPRECATED,
3146 value => FEATURE_HTML5_LC | FEATURE_XHTML2_ED |
3147 FEATURE_XHTMLBASIC11_CR | FEATURE_M12N10_REC,
3148 }), # check_attrs
3149 check_child_element => sub {
3150 my ($self, $item, $child_el, $child_nsuri, $child_ln,
3151 $child_is_transparent, $element_state) = @_;
3152 if (0) {
3153 ## XXXTODO: In <dir> element, then ...
3154 $HTMLPhrasingContentChecker{check_child_element}->(@_);
3155 } else {
3156 $HTMLFlowContentChecker{check_child_element}->(@_);
3157 }
3158 }, # check_child_element
3159 check_child_text => sub {
3160 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3161 if (0) {
3162 ## XXXTODO: In <dir> element, then ...
3163 $HTMLPhrasingContentChecker{check_child_text}->(@_);
3164 } else {
3165 $HTMLFlowContentChecker{check_child_text}->(@_);
3166 }
3167 }, # check_child_text
3168 }; # li
3169
3170 $Element->{$HTML_NS}->{dl} = {
3171 %HTMLChecker,
3172 status => FEATURE_HTML5_REC,
3173 check_attrs => $GetHTMLAttrsChecker->({
3174 compact => $GetHTMLBooleanAttrChecker->('compact'),
3175 }, {
3176 %HTMLAttrStatus,
3177 %HTMLM12NXHTML2CommonAttrStatus,
3178 compact => FEATURE_M12N10_REC_DEPRECATED,
3179 lang => FEATURE_HTML5_REC,
3180 sdaform => FEATURE_HTML20_RFC,
3181 sdapref => FEATURE_HTML20_RFC,
3182 type => FEATURE_M12N10_REC_DEPRECATED,
3183 }),
3184 check_start => sub {
3185 my ($self, $item, $element_state) = @_;
3186 $element_state->{phase} = 'before dt';
3187
3188 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3189 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3190 },
3191 check_child_element => sub {
3192 my ($self, $item, $child_el, $child_nsuri, $child_ln,
3193 $child_is_transparent, $element_state) = @_;
3194 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
3195 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
3196 $self->{onerror}->(node => $child_el,
3197 type => 'element not allowed:minus',
3198 level => $self->{level}->{must});
3199 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3200 #
3201 } elsif ($element_state->{phase} eq 'in dds') {
3202 if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3203 #$element_state->{phase} = 'in dds';
3204 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3205 $element_state->{phase} = 'in dts';
3206 } else {
3207 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3208 level => $self->{level}->{must});
3209 }
3210 } elsif ($element_state->{phase} eq 'in dts') {
3211 if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3212 #$element_state->{phase} = 'in dts';
3213 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3214 $element_state->{phase} = 'in dds';
3215 } else {
3216 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3217 level => $self->{level}->{must});
3218 }
3219 } elsif ($element_state->{phase} eq 'before dt') {
3220 if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
3221 $element_state->{phase} = 'in dts';
3222 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
3223 $self->{onerror}
3224 ->(node => $child_el, type => 'ps element missing',
3225 text => 'dt',
3226 level => $self->{level}->{must});
3227 $element_state->{phase} = 'in dds';
3228 } else {
3229 $self->{onerror}->(node => $child_el, type => 'element not allowed',
3230 level => $self->{level}->{must});
3231 }
3232 } else {
3233 die "check_child_element: Bad |dl| phase: $element_state->{phase}";
3234 }
3235 },
3236 check_child_text => sub {
3237 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3238 if ($has_significant) {
3239 $self->{onerror}->(node => $child_node, type => 'character not allowed',
3240 level => $self->{level}->{must});
3241 }
3242 },
3243 check_end => sub {
3244 my ($self, $item, $element_state) = @_;
3245 if ($element_state->{phase} eq 'in dts') {
3246 $self->{onerror}->(node => $item->{node},
3247 type => 'child element missing',
3248 text => 'dd',
3249 level => $self->{level}->{must});
3250 }
3251
3252 $HTMLChecker{check_end}->(@_);
3253 },
3254 };
3255
3256 $Element->{$HTML_NS}->{dt} = {
3257 %HTMLPhrasingContentChecker,
3258 status => FEATURE_HTML5_REC,
3259 check_attrs => $GetHTMLAttrsChecker->({}, {
3260 %HTMLAttrStatus,
3261 %HTMLM12NXHTML2CommonAttrStatus,
3262 lang => FEATURE_HTML5_REC,
3263 sdaform => FEATURE_HTML20_RFC,
3264 }),
3265 };
3266
3267 $Element->{$HTML_NS}->{dd} = {
3268 %HTMLFlowContentChecker,
3269 status => FEATURE_HTML5_REC,
3270 check_attrs => $GetHTMLAttrsChecker->({}, {
3271 %HTMLAttrStatus,
3272 %HTMLM12NXHTML2CommonAttrStatus,
3273 lang => FEATURE_HTML5_REC,
3274 sdaform => FEATURE_HTML20_RFC,
3275 }),
3276 };
3277
3278 $Element->{$HTML_NS}->{a} = {
3279 %HTMLTransparentChecker,
3280 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3281 check_attrs => sub {
3282 my ($self, $item, $element_state) = @_;
3283 my %attr;
3284 for my $attr (@{$item->{node}->attributes}) {
3285 my $attr_ns = $attr->namespace_uri;
3286 $attr_ns = '' unless defined $attr_ns;
3287 my $attr_ln = $attr->manakai_local_name;
3288 my $checker;
3289 my $status;
3290 if ($attr_ns eq '') {
3291 $status = {
3292 %HTMLAttrStatus,
3293 %HTMLM12NXHTML2CommonAttrStatus,
3294 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
3295 charset => FEATURE_M12N10_REC,
3296 coords => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3297 cryptopts => FEATURE_RFC2659,
3298 dn => FEATURE_RFC2659,
3299 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED |
3300 FEATURE_M12N10_REC,
3301 hreflang => FEATURE_HTML5_WD | FEATURE_XHTML2_ED |
3302 FEATURE_M12N10_REC,
3303 lang => FEATURE_HTML5_REC,
3304 media => FEATURE_HTML5_WD | FEATURE_XHTML2_ED,
3305 methods => FEATURE_HTML20_RFC,
3306 name => FEATURE_M12N10_REC_DEPRECATED,
3307 nonce => FEATURE_RFC2659,
3308 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3309 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3310 ping => FEATURE_HTML5_WD,
3311 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3312 rev => FEATURE_RDFA_REC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3313 sdapref => FEATURE_HTML20_RFC,
3314 shape => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3315 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3316 target => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3317 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
3318 urn => FEATURE_HTML20_RFC,
3319 }->{$attr_ln};
3320
3321 $checker = {
3322 charset => sub {
3323 my ($self, $attr) = @_;
3324 $HTMLCharsetChecker->($attr->value, @_);
3325 },
3326 ## TODO: HTML4 |coords|
3327 target => $HTMLTargetAttrChecker,
3328 href => $HTMLURIAttrChecker,
3329 ping => $HTMLSpaceURIsAttrChecker,
3330 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
3331 rev => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
3332 ## TODO: HTML4 |shape|
3333 media => $HTMLMQAttrChecker,
3334 ## TODO: HTML4/XHTML1 |name|
3335 hreflang => $HTMLLanguageTagAttrChecker,
3336 type => $HTMLIMTAttrChecker,
3337 }->{$attr_ln};
3338 if ($checker) {
3339 $attr{$attr_ln} = $attr;
3340 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
3341 $attr_ln !~ /[A-Z]/) {
3342 $checker = $HTMLDatasetAttrChecker;
3343 $status = $HTMLDatasetAttrStatus;
3344 } else {
3345 $checker = $HTMLAttrChecker->{$attr_ln};
3346 }
3347 }
3348 $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
3349 || $AttrChecker->{$attr_ns}->{''};
3350 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
3351 || $AttrStatus->{$attr_ns}->{''};
3352 $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
3353
3354 if ($checker) {
3355 $checker->($self, $attr, $item, $element_state) if ref $checker;
3356 } elsif ($attr_ns eq '' and not $status) {
3357 #
3358 } else {
3359 $self->{onerror}->(node => $attr,
3360 type => 'unknown attribute',
3361 level => $self->{level}->{uncertain});
3362 ## ISSUE: No conformance createria for unknown attributes in the spec
3363 }
3364
3365 $self->_attr_status_info ($attr, $status);
3366 }
3367
3368 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
3369 if (defined $attr{href}) {
3370 $self->{has_hyperlink_element} = 1;
3371 $self->{flag}->{in_a_href} = 1;
3372 } else {
3373 for (qw/target ping rel media hreflang type/) {
3374 if (defined $attr{$_}) {
3375 $self->{onerror}->(node => $attr{$_},
3376 type => 'attribute not allowed',
3377 level => $self->{level}->{must});
3378 }
3379 }
3380 }
3381
3382 $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
3383 },
3384 check_start => sub {
3385 my ($self, $item, $element_state) = @_;
3386 $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
3387
3388 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3389 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3390 },
3391 check_end => sub {
3392 my ($self, $item, $element_state) = @_;
3393 $self->_remove_minus_elements ($element_state);
3394 delete $self->{flag}->{in_a_href}
3395 unless $element_state->{in_a_href_original};
3396
3397 $HTMLTransparentChecker{check_end}->(@_);
3398 },
3399 };
3400
3401 $Element->{$HTML_NS}->{q} = {
3402 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3403 %HTMLPhrasingContentChecker,
3404 check_attrs => $GetHTMLAttrsChecker->({
3405 cite => $HTMLURIAttrChecker,
3406 }, {
3407 %HTMLAttrStatus,
3408 %HTMLM12NXHTML2CommonAttrStatus,
3409 cite => FEATURE_HTML5_AT_RISK | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3410 lang => FEATURE_HTML5_REC,
3411 sdapref => FEATURE_HTML2X_RFC,
3412 sdasuff => FEATURE_HTML2X_RFC,
3413 }),
3414 check_start => sub {
3415 my ($self, $item, $element_state) = @_;
3416
3417 $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
3418 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3419 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3420 },
3421 };
3422 ## TODO: "Quotation punctuation (such as quotation marks), if any, must be
3423 ## placed inside the <code>q</code> element." Though we cannot test the
3424 ## element against this requirement since it incluides a semantic bit,
3425 ## it might be possible to inform of the existence of quotation marks OUTSIDE
3426 ## the |q| element.
3427
3428 $Element->{$HTML_NS}->{cite} = {
3429 %HTMLPhrasingContentChecker,
3430 status => FEATURE_HTML5_REC,
3431 check_attrs => $GetHTMLAttrsChecker->({}, {
3432 %HTMLAttrStatus,
3433 %HTMLM12NXHTML2CommonAttrStatus,
3434 lang => FEATURE_HTML5_REC,
3435 sdaform => FEATURE_HTML20_RFC,
3436 }),
3437 };
3438
3439 $Element->{$HTML_NS}->{em} = {
3440 %HTMLPhrasingContentChecker,
3441 status => FEATURE_HTML5_REC,
3442 check_attrs => $GetHTMLAttrsChecker->({}, {
3443 %HTMLAttrStatus,
3444 %HTMLM12NXHTML2CommonAttrStatus,
3445 lang => FEATURE_HTML5_REC,
3446 sdaform => FEATURE_HTML20_RFC,
3447 }),
3448 };
3449
3450 $Element->{$HTML_NS}->{strong} = {
3451 %HTMLPhrasingContentChecker,
3452 status => FEATURE_HTML5_REC,
3453 check_attrs => $GetHTMLAttrsChecker->({}, {
3454 %HTMLAttrStatus,
3455 %HTMLM12NXHTML2CommonAttrStatus,
3456 lang => FEATURE_HTML5_REC,
3457 sdaform => FEATURE_HTML20_RFC,
3458 }),
3459 };
3460
3461 $Element->{$HTML_NS}->{small} = {
3462 %HTMLPhrasingContentChecker,
3463 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3464 check_attrs => $GetHTMLAttrsChecker->({}, {
3465 %HTMLAttrStatus,
3466 %HTMLM12NCommonAttrStatus,
3467 lang => FEATURE_HTML5_REC,
3468 }),
3469 };
3470
3471 $Element->{$HTML_NS}->{big} = {
3472 %HTMLPhrasingContentChecker,
3473 status => FEATURE_M12N10_REC,
3474 check_attrs => $GetHTMLAttrsChecker->({}, {
3475 %HTMLAttrStatus,
3476 %HTMLM12NCommonAttrStatus,
3477 lang => FEATURE_HTML5_REC,
3478 }),
3479 };
3480
3481 $Element->{$HTML_NS}->{mark} = {
3482 status => FEATURE_HTML5_WD,
3483 %HTMLPhrasingContentChecker,
3484 };
3485
3486 $Element->{$HTML_NS}->{dfn} = {
3487 %HTMLPhrasingContentChecker,
3488 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
3489 check_attrs => $GetHTMLAttrsChecker->({}, {
3490 %HTMLAttrStatus,
3491 %HTMLM12NXHTML2CommonAttrStatus,
3492 lang => FEATURE_HTML5_REC,
3493 }),
3494 check_start => sub {
3495 my ($self, $item, $element_state) = @_;
3496 $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
3497
3498 my $node = $item->{node};
3499 my $term = $node->get_attribute_ns (undef, 'title');
3500 unless (defined $term) {
3501 for my $child (@{$node->child_nodes}) {
3502 if ($child->node_type == 1) { # ELEMENT_NODE
3503 if (defined $term) {
3504 undef $term;
3505 last;
3506 } elsif ($child->manakai_local_name eq 'abbr') {
3507 my $nsuri = $child->namespace_uri;
3508 if (defined $nsuri and $nsuri eq $HTML_NS) {
3509 my $attr = $child->get_attribute_node_ns (undef, 'title');
3510 if ($attr) {
3511 $term = $attr->value;
3512 }
3513 }
3514 }
3515 } elsif ($child->node_type == 3 or $child->node_type == 4) {
3516 ## TEXT_NODE or CDATA_SECTION_NODE
3517 if ($child->data =~ /\A[\x09\x0A\x0C\x0D\x20]+\z/) { # Inter-element whitespace
3518 next;
3519 }
3520 undef $term;
3521 last;
3522 }
3523 }
3524 unless (defined $term) {
3525 $term = $node->text_content;
3526 }
3527 }
3528 if ($self->{term}->{$term}) {
3529 push @{$self->{term}->{$term}}, $node;
3530 } else {
3531 $self->{term}->{$term} = [$node];
3532 }
3533 ## ISSUE: The HTML5 definition for the defined term does not work with
3534 ## |ruby| unless |dfn| has |title|.
3535
3536 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
3537 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
3538 },
3539 check_end => sub {
3540 my ($self, $item, $element_state) = @_;
3541 $self->_remove_minus_elements ($element_state);
3542
3543 $HTMLPhrasingContentChecker{check_end}->(@_);
3544 },
3545 };
3546
3547 $Element->{$HTML_NS}->{abbr} = {
3548 %HTMLPhrasingContentChecker,
3549 status => FEATURE_HTML5_REC,
3550 check_attrs => $GetHTMLAttrsChecker->({}, {
3551 %HTMLAttrStatus,
3552 %HTMLM12NXHTML2CommonAttrStatus,
3553 full => FEATURE_XHTML2_ED,
3554 lang => FEATURE_HTML5_REC,
3555 }),
3556 ## NOTE: "If an abbreviation is pluralised, the expansion's grammatical
3557 ## number (plural vs singular) must match the grammatical number of the
3558 ## contents of the element." Though this can be checked by machine,
3559 ## it requires language-specific knowledge and dictionary, such that
3560 ## we don't support the check of the requirement.
3561 ## ISSUE: Is <abbr title="Cascading Style Sheets">CSS</abbr> conforming?
3562 };
3563
3564 $Element->{$HTML_NS}->{acronym} = {
3565 %HTMLPhrasingContentChecker,
3566 status => FEATURE_M12N10_REC,
3567 check_attrs => $GetHTMLAttrsChecker->({}, {
3568 %HTMLAttrStatus,
3569 %HTMLM12NCommonAttrStatus,
3570 lang => FEATURE_HTML5_REC,
3571 }),
3572 };
3573
3574 $Element->{$HTML_NS}->{time} = {
3575 status => FEATURE_HTML5_WD,
3576 %HTMLPhrasingContentChecker,
3577 check_attrs => $GetHTMLAttrsChecker->({
3578 datetime => sub { 1 }, # checked in |checker|
3579 }, {
3580 %HTMLAttrStatus,
3581 %HTMLM12NCommonAttrStatus,
3582 datetime => FEATURE_HTML5_FD,
3583 }), # check_attrs
3584 check_start => sub {
3585 my ($self, $item, $element_state) = @_;
3586 $self->_add_minus_elements ($element_state, {$HTML_NS => {time => 1}});
3587
3588 $HTMLPhrasingContentChecker{check_start}->(@_);
3589 }, # check_start
3590 check_end => sub {
3591 my ($self, $item, $element_state) = @_;
3592 $self->_remove_minus_elements ($element_state);
3593
3594 ## XXX Maybe we should move this code out somewhere (maybe
3595 ## Message::Date) such that we can reuse this code in other places
3596 ## (e.g. HTMLTimeElement implementation).
3597
3598 ## "Vaguer moments in time" or "valid date or time string".
3599 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
3600 my $input;
3601 my $reg_sp;
3602 my $input_node;
3603 if ($attr) {
3604 $input = $attr->value;
3605 $reg_sp = qr/[\x09\x0A\x0C\x0D\x20]/;
3606 $input_node = $attr;
3607 } else {
3608 $input = $item->{node}->text_content;
3609 $reg_sp = qr/\p{WhiteSpace}/;
3610 $input_node = $item->{node};
3611 }
3612
3613 my $hour;
3614 my $minute;
3615 my $second;
3616 if ($input =~ /
3617 \A
3618 $reg_sp*
3619 ([0-9]+) # 1
3620 (?>
3621 -([0-9]+) # 2
3622 -((?>[0-9]+)) # 3 # Use (?>) such that yyyy-mm-ddhh:mm does not match
3623 $reg_sp*
3624 (?>
3625 (?>
3626 T
3627 $reg_sp*
3628 )?
3629 ([0-9]+) # 4
3630 :([0-9]+) # 5
3631 (?>
3632 :([0-9]+(?>\.[0-9]+)?) # 6
3633 )?
3634 $reg_sp*
3635 (?>
3636 Z
3637 $reg_sp*
3638 |
3639 [+-]([0-9]+):([0-9]+) # 7, 8
3640 $reg_sp*
3641 )?
3642 )?
3643 \z
3644 |
3645 :([0-9]+) # 9
3646 (?:
3647 :([0-9]+(?>\.[0-9]+)?) # 10
3648 )?
3649 $reg_sp*
3650 \z
3651 )
3652 /x) {
3653 my $has_syntax_error;
3654 if (defined $2) { ## YYYY-MM-DD T? hh:mm
3655 if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
3656 (defined $4 and length $4 != 2) or
3657 (defined $5 and length $5 != 2)) {
3658 $self->{onerror}->(node => $input_node,
3659 type => 'dateortime:syntax error',
3660 level => $self->{level}->{must});
3661 $has_syntax_error = 1;
3662 }
3663
3664 if (1 <= $2 and $2 <= 12) {
3665 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3666 level => $self->{level}->{must})
3667 if $3 < 1 or
3668 $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
3669 $self->{onerror}->(node => $input_node, type => 'datetime:bad day',
3670 level => $self->{level}->{must})
3671 if $2 == 2 and $3 == 29 and
3672 not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
3673 } else {
3674 $self->{onerror}->(node => $input_node,
3675 type => 'datetime:bad month',
3676 level => $self->{level}->{must});
3677 }
3678 $self->{onerror}->(node => $input_node,
3679 type => 'datetime:bad year',
3680 level => $self->{level}->{must})
3681 if $1 == 0;
3682
3683 ($hour, $minute, $second) = ($4, $5, $6);
3684
3685 if (defined $7) { ## [+-]hh:mm
3686 if (length $7 != 2 or length $8 != 2) {
3687 $self->{onerror}->(node => $input_node,
3688 type => 'dateortime:syntax error',
3689 level => $self->{level}->{must});
3690 $has_syntax_error = 1;
3691 }
3692
3693 $self->{onerror}->(node => $input_node,
3694 type => 'datetime:bad timezone hour',
3695 level => $self->{level}->{must})
3696 if $7 > 23;
3697 $self->{onerror}->(node => $input_node,
3698 type => 'datetime:bad timezone minute',
3699 level => $self->{level}->{must})
3700 if $8 > 59;
3701 }
3702 } else { ## hh:mm
3703 if (length $1 != 2 or length $9 != 2) {
3704 $self->{onerror}->(node => $input_node,
3705 type => qq'dateortime:syntax error',
3706 level => $self->{level}->{must});
3707 $has_syntax_error = 1;
3708 }
3709
3710 ($hour, $minute, $second) = ($1, $9, $10);
3711 }
3712
3713 $self->{onerror}->(node => $input_node, type => 'datetime:bad hour',
3714 level => $self->{level}->{must}) if $hour > 23;
3715 $self->{onerror}->(node => $input_node, type => 'datetime:bad minute',
3716 level => $self->{level}->{must}) if $minute > 59;
3717
3718 if (defined $second) { ## s
3719 ## NOTE: Integer part of second don't have to have length of two.
3720
3721 if (substr ($second, 0, 1) eq '.') {
3722 $self->{onerror}->(node => $input_node,
3723 type => 'dateortime:syntax error',
3724 level => $self->{level}->{must});
3725 $has_syntax_error = 1;
3726 }
3727
3728 $self->{onerror}->(node => $input_node, type => 'datetime:bad second',
3729 level => $self->{level}->{must}) if $second >= 60;
3730 }
3731
3732 unless ($has_syntax_error) {
3733 $input =~ s/\A$reg_sp+//;
3734 $input =~ s/$reg_sp+\z//;
3735 if ($input =~ /$reg_sp+/) {
3736 $self->{onerror}->(node => $input_node,
3737 type => 'dateortime:syntax error',
3738 level => $self->{level}->{must});
3739 }
3740 }
3741 } else {
3742 $self->{onerror}->(node => $input_node,
3743 type => 'dateortime:syntax error',
3744 level => $self->{level}->{must});
3745 }
3746
3747 $HTMLPhrasingContentChecker{check_end}->(@_);
3748 }, # check_end
3749 }; # time
3750
3751 $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
3752 ## TODO: value inequalities (HTML5 revision 1463)
3753 ## TODO: content checking
3754 ## TODO: content or value must contain number (rev 2053)
3755 %HTMLPhrasingContentChecker,
3756 status => FEATURE_HTML5_WD,
3757 check_attrs => $GetHTMLAttrsChecker->({
3758 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3759 min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3760 low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3761 high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3762 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3763 optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
3764 }, {
3765 %HTMLAttrStatus,
3766 high => FEATURE_HTML5_DEFAULT,
3767 low => FEATURE_HTML5_DEFAULT,
3768 max => FEATURE_HTML5_DEFAULT,
3769 min => FEATURE_HTML5_DEFAULT,
3770 optimum => FEATURE_HTML5_DEFAULT,
3771 value => FEATURE_HTML5_DEFAULT,
3772 }), # check_attrs
3773 check_start => sub {
3774 my ($self, $item, $element_state) = @_;
3775 $self->_add_minus_elements ($element_state, {$HTML_NS => {meter => 1}});
3776
3777 $HTMLPhrasingContentChecker{check_start}->(@_);
3778 }, # check_start
3779 check_end => sub {
3780 my ($self, $item, $element_state) = @_;
3781 $self->_remove_minus_elements ($element_state);
3782
3783 ## XXX Work in progress
3784
3785 my $tc = $item->{node}->text_content;
3786 my $n1;
3787 my $denominator;
3788 my $n2;
3789 if ($tc =~ s/^([0-9.]+)//) {
3790 $n1 = $1;
3791
3792 if ($tc =~ s/^[^0-9.]+([0-9.]+)//) {
3793 $n2 = $1;
3794 } elsif ($tc =~ s/^([\x{0025}\x{066A}\x{FE6A}\x{FF05}\x{2030}\x{2031}])//) {
3795 $denominator = $1;
3796 }
3797 }
3798
3799 if ($tc =~ /[0-9.]/) {
3800 undef $n1;
3801 undef $n2;
3802 }
3803
3804 $HTMLPhrasingContentChecker{check_end}->(@_);
3805 }, # check_end
3806 }; # meter
3807
3808 $Element->{$HTML_NS}->{progress} = {
3809 %HTMLPhrasingContentChecker,
3810 status => FEATURE_HTML5_WD,
3811 check_attrs => $GetHTMLAttrsChecker->({
3812 value => sub { }, ## checked in |check_attrs2|
3813 max => sub { }, ## checked in |check_attrs2|
3814 }, {
3815 %HTMLAttrStatus,
3816 max => FEATURE_HTML5_DEFAULT,
3817 value => FEATURE_HTML5_DEFAULT,
3818 }), # check_attrs
3819 check_attrs2 => sub {
3820 my ($self, $item, $element_state) = @_;
3821
3822 my $max = 1;
3823 my $max_attr = $item->{node}->get_attribute_node_ns (undef, 'max');
3824 if ($max_attr) {
3825 $GetHTMLFloatingPointNumberAttrChecker->(sub {
3826 my $num = $_[0];
3827 $max = $num;
3828 return $num > 0; ## >, not >=
3829 })->($self, $max_attr);
3830 }
3831
3832 my $value_attr = $item->{node}->get_attribute_node_ns (undef, 'value');
3833 if ($value_attr) {
3834 $self->{onerror}->(node => $value_attr,
3835 type => 'attribute not allowed',
3836 text => 'value',
3837 level => $self->{level}->{should}); # RECOMMENDED
3838
3839 $GetHTMLFloatingPointNumberAttrChecker->(sub {
3840 my $num = $_[0];
3841
3842 unless ($num <= $max) {
3843 $self->{onerror}->(node => $value_attr,
3844 type => 'progress value out of range',
3845 value => $max, # XXX document error type
3846 level => $self->{level}->{must});
3847 }
3848
3849 return $num >= 0; ## >=, not >
3850 })->($self, $value_attr);
3851 }
3852 }, # check_attrs2
3853 # XXX warn if the value from the content is greater than |max|
3854 # attribute value.
3855 # XXX warn if the element content does not contain one or two numbers.
3856 check_start => sub {
3857 my ($self, $item, $element_state) = @_;
3858 $self->_add_minus_elements ($element_state, {$HTML_NS => {progress => 1}});
3859
3860 $HTMLPhrasingContentChecker{check_start}->(@_);
3861 }, # check_start
3862 check_end => sub {
3863 my ($self, $item, $element_state) = @_;
3864 $self->_remove_minus_elements ($element_state);
3865
3866 $HTMLPhrasingContentChecker{check_end}->(@_);
3867 }, # check_end
3868 }; # progress
3869
3870 $Element->{$HTML_NS}->{code} = {
3871 %HTMLPhrasingContentChecker,
3872 status => FEATURE_HTML5_REC,
3873 check_attrs => $GetHTMLAttrsChecker->({}, {
3874 %HTMLAttrStatus,
3875 %HTMLM12NXHTML2CommonAttrStatus,
3876 lang => FEATURE_HTML5_REC,
3877 sdaform => FEATURE_HTML20_RFC,
3878 }),
3879 };
3880
3881 $Element->{$HTML_NS}->{var} = {
3882 %HTMLPhrasingContentChecker,
3883 status => FEATURE_HTML5_REC,
3884 check_attrs => $GetHTMLAttrsChecker->({}, {
3885 %HTMLAttrStatus,
3886 %HTMLM12NXHTML2CommonAttrStatus,
3887 lang => FEATURE_HTML5_REC,
3888 sdaform => FEATURE_HTML20_RFC,
3889 }),
3890 };
3891
3892 $Element->{$HTML_NS}->{samp} = {
3893 %HTMLPhrasingContentChecker,
3894 status => FEATURE_HTML5_REC,
3895 check_attrs => $GetHTMLAttrsChecker->({}, {
3896 %HTMLAttrStatus,
3897 %HTMLM12NXHTML2CommonAttrStatus,
3898 lang => FEATURE_HTML5_REC,
3899 sdaform => FEATURE_HTML20_RFC,
3900 }),
3901 };
3902
3903 $Element->{$HTML_NS}->{kbd} = {
3904 %HTMLPhrasingContentChecker,
3905 status => FEATURE_HTML5_REC,
3906 check_attrs => $GetHTMLAttrsChecker->({}, {
3907 %HTMLAttrStatus,
3908 %HTMLM12NXHTML2CommonAttrStatus,
3909 lang => FEATURE_HTML5_REC,
3910 sdaform => FEATURE_HTML20_RFC,
3911 }),
3912 };
3913
3914 $Element->{$HTML_NS}->{sub} = {
3915 %HTMLPhrasingContentChecker,
3916 status => FEATURE_HTML5_REC,
3917 check_attrs => $GetHTMLAttrsChecker->({}, {
3918 %HTMLAttrStatus,
3919 %HTMLM12NXHTML2CommonAttrStatus,
3920 lang => FEATURE_HTML5_REC,
3921 sdapref => FEATURE_HTML2X_RFC,
3922 }),
3923 };
3924
3925 $Element->{$HTML_NS}->{sup} = $Element->{$HTML_NS}->{sub};
3926
3927 $Element->{$HTML_NS}->{span} = {
3928 %HTMLPhrasingContentChecker,
3929 status => FEATURE_HTML5_REC,
3930 check_attrs => $GetHTMLAttrsChecker->({}, {
3931 %HTMLAttrStatus,
3932 %HTMLM12NXHTML2CommonAttrStatus,
3933 datafld => FEATURE_HTML4_REC_RESERVED,
3934 dataformatas => FEATURE_HTML4_REC_RESERVED,
3935 datasrc => FEATURE_HTML4_REC_RESERVED,
3936 lang => FEATURE_HTML5_REC,
3937 sdaform => FEATURE_HTML2X_RFC,
3938 }),
3939 };
3940
3941 # XXX Warning for "authors are encouraged to consider whether other
3942 # elements might be more applicable"
3943 $Element->{$HTML_NS}->{i} = {
3944 %HTMLPhrasingContentChecker,
3945 status => FEATURE_HTML5_REC,
3946 check_attrs => $GetHTMLAttrsChecker->({}, {
3947 %HTMLAttrStatus,
3948 %HTMLM12NCommonAttrStatus,
3949 lang => FEATURE_HTML5_REC,
3950 sdaform => FEATURE_HTML20_RFC,
3951 }),
3952 };
3953
3954 $Element->{$HTML_NS}->{b} = $Element->{$HTML_NS}->{i};
3955
3956 $Element->{$HTML_NS}->{tt} = {
3957 %HTMLPhrasingContentChecker,
3958 status => FEATURE_M12N10_REC,
3959 check_attrs => $GetHTMLAttrsChecker->({}, {
3960 %HTMLAttrStatus,
3961 %HTMLM12NCommonAttrStatus,
3962 lang => FEATURE_HTML5_REC,
3963 sdaform => FEATURE_HTML20_RFC,
3964 }),
3965 };
3966
3967 $Element->{$HTML_NS}->{s} = {
3968 %HTMLPhrasingContentChecker,
3969 status => FEATURE_M12N10_REC_DEPRECATED,
3970 check_attrs => $GetHTMLAttrsChecker->({}, {
3971 %HTMLAttrStatus,
3972 %HTMLM12NCommonAttrStatus,
3973 lang => FEATURE_HTML5_REC,
3974 }),
3975 };
3976
3977 $Element->{$HTML_NS}->{strike} = $Element->{$HTML_NS}->{s};
3978
3979 $Element->{$HTML_NS}->{u} = $Element->{$HTML_NS}->{s};
3980
3981 $Element->{$HTML_NS}->{bdo} = {
3982 %HTMLPhrasingContentChecker,
3983 status => FEATURE_HTML5_REC,
3984 check_attrs => sub {
3985 my ($self, $item, $element_state) = @_;
3986 $GetHTMLAttrsChecker->({}, {
3987 %HTMLAttrStatus,
3988 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
3989 dir => FEATURE_HTML5_REC,
3990 id => FEATURE_HTML5_REC,
3991 style => FEATURE_HTML5_REC,
3992 title => FEATURE_HTML5_REC,
3993 lang => FEATURE_HTML5_REC,
3994 sdapref => FEATURE_HTML2X_RFC,
3995 sdasuff => FEATURE_HTML2X_RFC,
3996 })->($self, $item, $element_state);
3997 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
3998 $self->{onerror}->(node => $item->{node},
3999 type => 'attribute missing',
4000 text => 'dir',
4001 level => $self->{level}->{must});
4002 }
4003 },
4004 ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
4005 };
4006
4007 $Element->{$HTML_NS}->{ruby} = {
4008 %HTMLPhrasingContentChecker,
4009 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4010 check_attrs => $GetHTMLAttrsChecker->({}, {
4011 %HTMLAttrStatus,
4012 %HTMLM12NXHTML2CommonAttrStatus, # XHTML 1.1 & XHTML 2.0 & XHTML+RDFa 1.0
4013 lang => FEATURE_HTML5_REC,
4014 }),
4015 check_start => sub {
4016 my ($self, $item, $element_state) = @_;
4017
4018 $element_state->{phase} = 'before-rb';
4019 #$element_state->{has_sig}
4020
4021 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4022 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4023 },
4024 ## NOTE: (phrasing, (rt | (rp, rt, rp)))+
4025 check_child_element => sub {
4026 my ($self, $item, $child_el, $child_nsuri, $child_ln,
4027 $child_is_transparent, $element_state) = @_;
4028 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4029 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4030 $self->{onerror}->(node => $child_el,
4031 type => 'element not allowed:minus',
4032 level => $self->{level}->{must});
4033 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4034 #
4035 } elsif ($element_state->{phase} eq 'before-rb') {
4036 if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4037 $element_state->{phase} = 'in-rb';
4038 } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
4039 $self->{onerror}->(node => $child_el,
4040 level => $self->{level}->{should},
4041 type => 'no significant content before');
4042 $element_state->{phase} = 'after-rt';
4043 } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
4044 $self->{onerror}->(node => $child_el,
4045 level => $self->{level}->{should},
4046 type => 'no significant content before');
4047 $element_state->{phase} = 'after-rp1';
4048 } else {
4049 $self->{onerror}->(node => $child_el,
4050 type => 'element not allowed:ruby base',
4051 level => $self->{level}->{must});
4052 $element_state->{phase} = 'in-rb';
4053 }
4054 } elsif ($element_state->{phase} eq 'in-rb') {
4055 if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4056 #$element_state->{phase} = 'in-rb';
4057 } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
4058 unless ($element_state->{has_significant}) {
4059 $self->{onerror}->(node => $child_el,
4060 level => $self->{level}->{should},
4061 type => 'no significant content before');
4062 }
4063 $element_state->{phase} = 'after-rt';
4064 } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
4065 unless ($element_state->{has_significant}) {
4066 $self->{onerror}->(node => $child_el,
4067 level => $self->{level}->{should},
4068 type => 'no significant content before');
4069 }
4070 $element_state->{phase} = 'after-rp1';
4071 } else {
4072 $self->{onerror}->(node => $child_el,
4073 type => 'element not allowed:ruby base',
4074 level => $self->{level}->{must});
4075 #$element_state->{phase} = 'in-rb';
4076 }
4077 } elsif ($element_state->{phase} eq 'after-rt') {
4078 if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4079 if ($element_state->{has_significant}) {
4080 $element_state->{has_sig} = 1;
4081 delete $element_state->{has_significant};
4082 }
4083 $element_state->{phase} = 'in-rb';
4084 } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
4085 $self->{onerror}->(node => $child_el,
4086 level => $self->{level}->{should},
4087 type => 'no significant content before');
4088 $element_state->{phase} = 'after-rp1';
4089 } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
4090 $self->{onerror}->(node => $child_el,
4091 level => $self->{level}->{should},
4092 type => 'no significant content before');
4093 #$element_state->{phase} = 'after-rt';
4094 } else {
4095 $self->{onerror}->(node => $child_el,
4096 type => 'element not allowed:ruby base',
4097 level => $self->{level}->{must});
4098 if ($element_state->{has_significant}) {
4099 $element_state->{has_sig} = 1;
4100 delete $element_state->{has_significant};
4101 }
4102 $element_state->{phase} = 'in-rb';
4103 }
4104 } elsif ($element_state->{phase} eq 'after-rp1') {
4105 if ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
4106 $element_state->{phase} = 'after-rp-rt';
4107 } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
4108 $self->{onerror}->(node => $child_el,
4109 type => 'ps element missing',
4110 text => 'rt',
4111 level => $self->{level}->{must});
4112 $element_state->{phase} = 'after-rp2';
4113 } else {
4114 $self->{onerror}->(node => $child_el,
4115 type => 'ps element missing',
4116 text => 'rt',
4117 level => $self->{level}->{must});
4118 $self->{onerror}->(node => $child_el,
4119 type => 'ps element missing',
4120 text => 'rp',
4121 level => $self->{level}->{must});
4122 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4123 $self->{onerror}->(node => $child_el,
4124 type => 'element not allowed:ruby base',
4125 level => $self->{level}->{must});
4126 }
4127 if ($element_state->{has_significant}) {
4128 $element_state->{has_sig} = 1;
4129 delete $element_state->{has_significant};
4130 }
4131 $element_state->{phase} = 'in-rb';
4132 }
4133 } elsif ($element_state->{phase} eq 'after-rp-rt') {
4134 if ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
4135 $element_state->{phase} = 'after-rp2';
4136 } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
4137 $self->{onerror}->(node => $child_el,
4138 type => 'ps element missing',
4139 text => 'rp',
4140 level => $self->{level}->{must});
4141 $self->{onerror}->(node => $child_el,
4142 level => $self->{level}->{should},
4143 type => 'no significant content before');
4144 $element_state->{phase} = 'after-rt';
4145 } else {
4146 $self->{onerror}->(node => $child_el,
4147 type => 'ps element missing',
4148 text => 'rp',
4149 level => $self->{level}->{must});
4150 unless ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4151 $self->{onerror}->(node => $child_el,
4152 type => 'element not allowed:ruby base',
4153 level => $self->{level}->{must});
4154 }
4155 if ($element_state->{has_significant}) {
4156 $element_state->{has_sig} = 1;
4157 delete $element_state->{has_significant};
4158 }
4159 $element_state->{phase} = 'in-rb';
4160 }
4161 } elsif ($element_state->{phase} eq 'after-rp2') {
4162 if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
4163 if ($element_state->{has_significant}) {
4164 $element_state->{has_sig} = 1;
4165 delete $element_state->{has_significant};
4166 }
4167 $element_state->{phase} = 'in-rb';
4168 } elsif ($child_ln eq 'rt' and $child_nsuri eq $HTML_NS) {
4169 $self->{onerror}->(node => $child_el,
4170 level => $self->{level}->{should},
4171 type => 'no significant content before');
4172 $element_state->{phase} = 'after-rt';
4173 } elsif ($child_ln eq 'rp' and $child_nsuri eq $HTML_NS) {
4174 $self->{onerror}->(node => $child_el,
4175 level => $self->{level}->{should},
4176 type => 'no significant content before');
4177 $element_state->{phase} = 'after-rp1';
4178 } else {
4179 $self->{onerror}->(node => $child_el,
4180 type => 'element not allowed:ruby base',
4181 level => $self->{level}->{must});
4182 if ($element_state->{has_significant}) {
4183 $element_state->{has_sig} = 1;
4184 delete $element_state->{has_significant};
4185 }
4186 $element_state->{phase} = 'in-rb';
4187 }
4188 } else {
4189 die "check_child_element: Bad |ruby| phase: $element_state->{phase}";
4190 }
4191 },
4192 check_child_text => sub {
4193 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4194 if ($has_significant) {
4195 if ($element_state->{phase} eq 'before-rb') {
4196 $element_state->{phase} = 'in-rb';
4197 } elsif ($element_state->{phase} eq 'in-rb') {
4198 #
4199 } elsif ($element_state->{phase} eq 'after-rt' or
4200 $element_state->{phase} eq 'after-rp2') {
4201 $element_state->{phase} = 'in-rb';
4202 } elsif ($element_state->{phase} eq 'after-rp1') {
4203 $self->{onerror}->(node => $child_node,
4204 type => 'ps element missing',
4205 text => 'rt',
4206 level => $self->{level}->{must});
4207 $self->{onerror}->(node => $child_node,
4208 type => 'ps element missing',
4209 text => 'rp',
4210 level => $self->{level}->{must});
4211 $element_state->{phase} = 'in-rb';
4212 } elsif ($element_state->{phase} eq 'after-rp-rt') {
4213 $self->{onerror}->(node => $child_node,
4214 type => 'ps element missing',
4215 text => 'rp',
4216 level => $self->{level}->{must});
4217 $element_state->{phase} = 'in-rb';
4218 } else {
4219 die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
4220 }
4221 }
4222 },
4223 check_end => sub {
4224 my ($self, $item, $element_state) = @_;
4225 $self->_remove_minus_elements ($element_state);
4226
4227 if ($element_state->{phase} eq 'before-rb') {
4228 $self->{onerror}->(node => $item->{node},
4229 level => $self->{level}->{should},
4230 type => 'no significant content');
4231 $self->{onerror}->(node => $item->{node},
4232 type => 'element missing',
4233 text => 'rt',
4234 level => $self->{level}->{must});
4235 } elsif ($element_state->{phase} eq 'in-rb') {
4236 unless ($element_state->{has_significant}) {
4237 $self->{onerror}->(node => $item->{node},
4238 level => $self->{level}->{should},
4239 type => 'no significant content at the end');
4240 }
4241 $self->{onerror}->(node => $item->{node},
4242 type => 'element missing',
4243 text => 'rt',
4244 level => $self->{level}->{must});
4245 } elsif ($element_state->{phase} eq 'after-rt' or
4246 $element_state->{phase} eq 'after-rp2') {
4247 #
4248 } elsif ($element_state->{phase} eq 'after-rp1') {
4249 $self->{onerror}->(node => $item->{node},
4250 type => 'element missing',
4251 text => 'rt',
4252 level => $self->{level}->{must});
4253 $self->{onerror}->(node => $item->{node},
4254 type => 'element missing',
4255 text => 'rp',
4256 level => $self->{level}->{must});
4257 } elsif ($element_state->{phase} eq 'after-rp-rt') {
4258 $self->{onerror}->(node => $item->{node},
4259 type => 'element missing',
4260 text => 'rp',
4261 level => $self->{level}->{must});
4262 } else {
4263 die "check_child_text: Bad |ruby| phase: $element_state->{phase}";
4264 }
4265
4266 ## NOTE: A modified version of |check_end| of %AnyChecker.
4267 if ($element_state->{has_significant} or $element_state->{has_sig}) {
4268 $item->{real_parent_state}->{has_significant} = 1;
4269 }
4270 },
4271 };
4272
4273 $Element->{$HTML_NS}->{rt} = {
4274 %HTMLPhrasingContentChecker,
4275 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4276 check_attrs => $GetHTMLAttrsChecker->({}, {
4277 %HTMLAttrStatus,
4278 %HTMLM12NXHTML2CommonAttrStatus,
4279 lang => FEATURE_HTML5_REC,
4280 }),
4281 };
4282
4283 $Element->{$HTML_NS}->{rp} = {
4284 %HTMLPhrasingContentChecker,
4285 status => FEATURE_HTML5_WD | FEATURE_RUBY_REC,
4286 check_attrs => $GetHTMLAttrsChecker->({}, {
4287 %HTMLAttrStatus,
4288 %HTMLM12NXHTML2CommonAttrStatus,
4289 lang => FEATURE_HTML5_REC,
4290 }),
4291 }; # rp
4292
4293 =pod
4294
4295 ## TODO:
4296
4297 +
4298 + <p>Partly because of the confusion described above, authors are
4299 + strongly recommended to always mark up all paragraphs with the
4300 + <code>p</code> element, and to not have any <code>ins</code> or
4301 + <code>del</code> elements that cross across any <span
4302 + title="paragraph">implied paragraphs</span>.</p>
4303 +
4304 (An informative note)
4305
4306 <p><code>ins</code> elements should not cross <span
4307 + title="paragraph">implied paragraph</span> boundaries.</p>
4308 (normative)
4309
4310 + <p><code>del</code> elements should not cross <span
4311 + title="paragraph">implied paragraph</span> boundaries.</p>
4312 (normative)
4313
4314 =cut
4315
4316 $Element->{$HTML_NS}->{ins} = {
4317 %HTMLTransparentChecker,
4318 status => FEATURE_HTML5_REC,
4319 check_attrs => $GetHTMLAttrsChecker->({
4320 cite => $HTMLURIAttrChecker,
4321 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4322 }, {
4323 %HTMLAttrStatus,
4324 %HTMLM12NCommonAttrStatus,
4325 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4326 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4327 lang => FEATURE_HTML5_REC,
4328 }),
4329 check_start => sub {
4330 my ($self, $item, $element_state) = @_;
4331
4332 $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4333 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4334 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4335 },
4336 };
4337
4338 $Element->{$HTML_NS}->{del} = {
4339 %HTMLTransparentChecker,
4340 status => FEATURE_HTML5_REC,
4341 check_attrs => $GetHTMLAttrsChecker->({
4342 cite => $HTMLURIAttrChecker,
4343 datetime => $GetDateTimeAttrChecker->('global_date_and_time_string'),
4344 }, {
4345 %HTMLAttrStatus,
4346 %HTMLM12NCommonAttrStatus,
4347 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4348 datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4349 lang => FEATURE_HTML5_REC,
4350 }),
4351 check_end => sub {
4352 my ($self, $item, $element_state) = @_;
4353 if ($element_state->{has_significant}) {
4354 ## NOTE: Significantness flag does not propagate.
4355 } elsif ($item->{transparent}) {
4356 #
4357 } else {
4358 $self->{onerror}->(node => $item->{node},
4359 level => $self->{level}->{should},
4360 type => 'no significant content');
4361 }
4362 },
4363 check_start => sub {
4364 my ($self, $item, $element_state) = @_;
4365
4366 $element_state->{uri_info}->{cite}->{type}->{cite} = 1;
4367 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4368 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4369 },
4370 };
4371
4372 $Element->{$HTML_NS}->{figure} = {
4373 %HTMLFlowContentChecker,
4374 status => FEATURE_HTML5_WD,
4375 ## NOTE: legend, Flow | Flow, legend?
4376 check_child_element => sub {
4377 my ($self, $item, $child_el, $child_nsuri, $child_ln,
4378 $child_is_transparent, $element_state) = @_;
4379 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4380 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4381 $self->{onerror}->(node => $child_el,
4382 type => 'element not allowed:minus',
4383 level => $self->{level}->{must});
4384 $element_state->{has_non_legend} = 1;
4385 $element_state->{has_non_table} = 1;
4386 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4387 $element_state->{has_non_table} = 1;
4388 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
4389 if ($element_state->{has_legend_at_first}) {
4390 $self->{onerror}->(node => $child_el,
4391 type => 'element not allowed:figure legend',
4392 level => $self->{level}->{must});
4393 } elsif ($element_state->{has_legend}) {
4394 $self->{onerror}->(node => $element_state->{has_legend},
4395 type => 'element not allowed:figure legend',
4396 level => $self->{level}->{must});
4397 $element_state->{has_legend} = $child_el;
4398 } elsif ($element_state->{has_non_legend}) {
4399 $element_state->{has_legend} = $child_el;
4400 } else {
4401 $element_state->{has_legend_at_first} = 1;
4402 }
4403 delete $element_state->{has_non_legend};
4404 } else {
4405 if ($child_nsuri eq $HTML_NS and $child_ln eq 'table') {
4406 $element_state->{has_table}++;
4407 } else {
4408 $element_state->{has_non_table}++;
4409 }
4410 $HTMLFlowContentChecker{check_child_element}->(@_);
4411 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
4412 }
4413 },
4414 check_start => sub {
4415 my ($self, $item, $element_state) = @_;
4416
4417 $element_state->{in_figure} = 1;
4418 }, # check_start
4419 check_child_text => sub {
4420 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4421 if ($has_significant) {
4422 $element_state->{has_non_legend} = 1;
4423 $element_state->{has_non_table}++;
4424 }
4425 }, # check_child_text
4426 check_end => sub {
4427 my ($self, $item, $element_state) = @_;
4428
4429 if ($element_state->{has_legend_at_first}) {
4430 #
4431 } elsif ($element_state->{has_legend}) {
4432 if ($element_state->{has_non_legend}) {
4433 $self->{onerror}->(node => $element_state->{has_legend},
4434 type => 'element not allowed:figure legend',
4435 level => $self->{level}->{must});
4436 }
4437 }
4438
4439 if (($element_state->{has_table} || 0) == 1 and
4440 not $element_state->{has_non_table} and
4441 $element_state->{table_caption_element}) {
4442 $self->{onerror}->(node => $element_state->{table_caption_element},
4443 type => 'element not allowed',
4444 level => $self->{level}->{should});
4445 }
4446
4447 $HTMLFlowContentChecker{check_end}->(@_);
4448 ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
4449 },
4450 };
4451
4452 my $AttrCheckerNotImplemented = sub {
4453 my ($self, $attr) = @_;
4454 $self->{onerror}->(node => $attr,
4455 type => 'unknown attribute',
4456 level => $self->{level}->{uncertain});
4457 };
4458
4459 $Element->{$HTML_NS}->{img} = {
4460 %HTMLEmptyChecker,
4461 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4462 check_attrs => sub {
4463 my ($self, $item, $element_state) = @_;
4464 $GetHTMLAttrsChecker->({
4465 align => $GetHTMLEnumeratedAttrChecker->({
4466 bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4467 }),
4468 alt => sub { }, ## NOTE: No syntactical requirement
4469 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4470 src => $HTMLURIAttrChecker,
4471 usemap => $HTMLUsemapAttrChecker,
4472 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4473 ismap => sub {
4474 my ($self, $attr, $parent_item) = @_;
4475 if (not $self->{flag}->{in_a_href}) {
4476 $self->{onerror}->(node => $attr,
4477 type => 'attribute not allowed:ismap',
4478 level => $self->{level}->{must});
4479 }
4480 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
4481 },
4482 longdesc => $HTMLURIAttrChecker,
4483 ## TODO: HTML4 |name|
4484 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4485 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4486 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4487 }, {
4488 %HTMLAttrStatus,
4489 %HTMLM12NXHTML2CommonAttrStatus,
4490 align => FEATURE_M12N10_REC_DEPRECATED,
4491 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4492 border => FEATURE_M12N10_REC_DEPRECATED,
4493 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4494 hspace => FEATURE_M12N10_REC_DEPRECATED,
4495 ismap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4496 lang => FEATURE_HTML5_REC,
4497 longdesc => FEATURE_M12N10_REC,
4498 name => FEATURE_M12N10_REC_DEPRECATED,
4499 sdapref => FEATURE_HTML20_RFC,
4500 src => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4501 usemap => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4502 vspace => FEATURE_M12N10_REC_DEPRECATED,
4503 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4504 })->($self, $item, $element_state);
4505 unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
4506 $self->{onerror}->(node => $item->{node},
4507 type => 'attribute missing',
4508 text => 'alt',
4509 level => $self->{level}->{should});
4510 ## TODO: ...
4511 }
4512 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4513 $self->{onerror}->(node => $item->{node},
4514 type => 'attribute missing',
4515 text => 'src',
4516 level => $self->{level}->{must});
4517 }
4518
4519 ## TODO: external resource check
4520
4521 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4522 $element_state->{uri_info}->{lowsrc}->{type}->{embedded} = 1;
4523 $element_state->{uri_info}->{dynsrc}->{type}->{embedded} = 1;
4524 $element_state->{uri_info}->{longdesc}->{type}->{cite} = 1;
4525 },
4526 };
4527
4528 $Element->{$HTML_NS}->{iframe} = {
4529 %HTMLTextChecker, # XXX content model restriction
4530 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4531 ## NOTE: Not part of M12N10 Strict
4532 check_attrs => $GetHTMLAttrsChecker->({
4533 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4534 name => $HTMLBrowsingContextNameAttrChecker,
4535 sandbox => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->({
4536 'allow-same-origin' => 1, 'allow-forms' => 1, 'allow-scripts' => 1,
4537 }),
4538 seemless => $GetHTMLBooleanAttrChecker->('seemless'),
4539 src => $HTMLURIAttrChecker,
4540 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4541 }, {
4542 %HTMLAttrStatus,
4543 %HTMLM12NCommonAttrStatus,
4544 align => FEATURE_XHTML10_REC,
4545 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4546 frameborder => FEATURE_M12N10_REC,
4547 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4548 id => FEATURE_HTML5_REC,
4549 longdesc => FEATURE_M12N10_REC,
4550 marginheight => FEATURE_M12N10_REC,
4551 marginwidth => FEATURE_M12N10_REC,
4552 #name => FEATURE_HTML5_WD | FEATURE_M12N10_REC_DEPRECATED,
4553 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4554 sandbox => FEATURE_HTML5_WD,
4555 scrolling => FEATURE_M12N10_REC,
4556 seemless => FEATURE_HTML5_WD,
4557 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4558 title => FEATURE_HTML5_REC,
4559 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4560 }),
4561 check_start => sub {
4562 my ($self, $item, $element_state) = @_;
4563
4564 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4565 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4566 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4567 },
4568 };
4569
4570 $Element->{$HTML_NS}->{embed} = {
4571 %HTMLEmptyChecker,
4572 status => FEATURE_HTML5_WD,
4573 check_attrs => sub {
4574 my ($self, $item, $element_state) = @_;
4575 my $has_src;
4576 for my $attr (@{$item->{node}->attributes}) {
4577 my $attr_ns = $attr->namespace_uri;
4578 $attr_ns = '' unless defined $attr_ns;
4579 my $attr_ln = $attr->manakai_local_name;
4580 my $checker;
4581
4582 my $status = {
4583 %HTMLAttrStatus,
4584 height => FEATURE_HTML5_LC,
4585 src => FEATURE_HTML5_WD,
4586 type => FEATURE_HTML5_WD,
4587 width => FEATURE_HTML5_LC,
4588 }->{$attr_ln};
4589
4590 if ($attr_ns eq '') {
4591 if ($attr_ln eq 'src') {
4592 $checker = $HTMLURIAttrChecker;
4593 $has_src = 1;
4594 } elsif ($attr_ln eq 'type') {
4595 $checker = $HTMLIMTAttrChecker;
4596 } elsif ($attr_ln eq 'width' or $attr_ln eq 'height') {
4597 $checker = $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 });
4598 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
4599 $attr_ln !~ /[A-Z]/) {
4600 $checker = $HTMLDatasetAttrChecker;
4601 $status = $HTMLDatasetAttrStatus;
4602 } elsif ($attr_ln !~ /^[Xx][Mm][Ll]/ and
4603 $attr_ln !~ /[A-Z]/ and
4604 $attr_ln =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
4605 $checker = $HTMLAttrChecker->{$attr_ln}
4606 || sub { }; ## NOTE: Any local attribute is ok.
4607 $status = FEATURE_HTML5_WD | FEATURE_ALLOWED;
4608 } else {
4609 $checker = $HTMLAttrChecker->{$attr_ln};
4610 }
4611 }
4612 $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
4613 || $AttrChecker->{$attr_ns}->{''};
4614 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
4615 || $AttrStatus->{$attr_ns}->{''};
4616 $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
4617
4618 if ($checker) {
4619 $checker->($self, $attr, $item, $element_state);
4620 } elsif ($attr_ns eq '' and not $status) {
4621 #
4622 } else {
4623 $self->{onerror}->(node => $attr,
4624 type => 'unknown attribute',
4625 level => $self->{level}->{uncertain});
4626 ## ISSUE: No conformance createria for global attributes in the spec
4627 }
4628
4629 $self->_attr_status_info ($attr, $status);
4630 }
4631
4632 unless ($has_src) {
4633 $self->{onerror}->(node => $item->{node},
4634 type => 'attribute missing',
4635 text => 'src',
4636 level => $self->{level}->{info});
4637 ## NOTE: <embed> without src="" is allowed since revision 1929.
4638 ## We issues an informational message since <embed> w/o src=""
4639 ## is likely an authoring error.
4640 }
4641
4642 ## TODO: external resource check
4643
4644 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4645 },
4646 };
4647
4648 ## TODO:
4649 ## {applet} FEATURE_M12N10_REC_DEPRECATED
4650 ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
4651
4652 $Element->{$HTML_NS}->{object} = {
4653 %HTMLTransparentChecker,
4654 status => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4655 check_attrs => sub {
4656 my ($self, $item, $element_state) = @_;
4657 $GetHTMLAttrsChecker->({
4658 align => $GetHTMLEnumeratedAttrChecker->({
4659 bottom => 1, middle => 1, top => 1, left => 1, right => 1,
4660 }),
4661 archive => $HTMLSpaceURIsAttrChecker,
4662 ## TODO: Relative to @codebase
4663 border => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4664 classid => $HTMLURIAttrChecker,
4665 codebase => $HTMLURIAttrChecker,
4666 codetype => $HTMLIMTAttrChecker,
4667 ## TODO: "RECOMMENDED when |classid| is specified" [HTML4]
4668 data => $HTMLURIAttrChecker,
4669 declare => $GetHTMLBooleanAttrChecker->('declare'),
4670 ## NOTE: "The object MUST be instantiated by a subsequent OBJECT ..."
4671 ## [HTML4] but we don't know how to test this.
4672 form => $HTMLFormAttrChecker,
4673 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4674 hspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4675 name => $HTMLBrowsingContextNameAttrChecker,
4676 ## NOTE: |name| attribute of the |object| element defines
4677 ## the name of the browsing context created by the element,
4678 ## if any, but is also used as the form control name of the
4679 ## form control provided by the plugin, if any.
4680 standby => sub {}, ## NOTE: %Text; in HTML4
4681 type => $HTMLIMTAttrChecker,
4682 usemap => $HTMLUsemapAttrChecker,
4683 vspace => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4684 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4685 }, {
4686 %HTMLAttrStatus,
4687 %HTMLM12NXHTML2CommonAttrStatus,
4688 align => FEATURE_XHTML10_REC,
4689 archive => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4690 border => FEATURE_XHTML10_REC,
4691 classid => FEATURE_M12N10_REC,
4692 codebase => FEATURE_M12N10_REC,
4693 codetype => FEATURE_M12N10_REC,
4694 'content-length' => FEATURE_XHTML2_ED,
4695 data => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4696 datafld => FEATURE_HTML4_REC_RESERVED,
4697 dataformatas => FEATURE_HTML4_REC_RESERVED,
4698 datasrc => FEATURE_HTML4_REC_RESERVED,
4699 declare => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4700 form => FEATURE_HTML5_LC,
4701 height => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4702 hspace => FEATURE_XHTML10_REC,
4703 lang => FEATURE_HTML5_REC,
4704 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4705 standby => FEATURE_M12N10_REC,
4706 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4707 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
4708 usemap => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4709 vspace => FEATURE_XHTML10_REC,
4710 width => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
4711 })->($self, $item, $element_state);
4712 unless ($item->{node}->has_attribute_ns (undef, 'data')) {
4713 unless ($item->{node}->has_attribute_ns (undef, 'type')) {
4714 $self->{onerror}->(node => $item->{node},
4715 type => 'attribute missing:data|type',
4716 level => $self->{level}->{must});
4717 }
4718 }
4719
4720 $element_state->{uri_info}->{data}->{type}->{embedded} = 1;
4721 $element_state->{uri_info}->{classid}->{type}->{embedded} = 1;
4722 $element_state->{uri_info}->{codebase}->{type}->{base} = 1;
4723 ## TODO: archive
4724 $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
4725 },
4726 ## NOTE: param*, transparent (Flow)
4727 check_child_element => sub {
4728 my ($self, $item, $child_el, $child_nsuri, $child_ln,
4729 $child_is_transparent, $element_state) = @_;
4730 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4731 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4732 $self->{onerror}->(node => $child_el,
4733 type => 'element not allowed:minus',
4734 level => $self->{level}->{must});
4735 $element_state->{has_non_legend} = 1;
4736 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4737 #
4738 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
4739 if ($element_state->{has_non_param}) {
4740 $self->{onerror}->(node => $child_el,
4741 type => 'element not allowed:flow',
4742 level => $self->{level}->{must});
4743 }
4744 } else {
4745 $HTMLFlowContentChecker{check_child_element}->(@_);
4746 $element_state->{has_non_param} = 1;
4747 }
4748 },
4749 check_child_text => sub {
4750 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4751 if ($has_significant) {
4752 $element_state->{has_non_param} = 1;
4753 }
4754 },
4755 check_end => sub {
4756 my ($self, $item, $element_state) = @_;
4757 if ($element_state->{has_significant}) {
4758 $item->{real_parent_state}->{has_significant} = 1;
4759 } elsif ($item->{node}->manakai_parent_element) {
4760 ## NOTE: Transparent.
4761 } else {
4762 $self->{onerror}->(node => $item->{node},
4763 level => $self->{level}->{should},
4764 type => 'no significant content');
4765 }
4766 },
4767 };
4768 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
4769 ## What about |<section><object data><style scoped></style>x</object></section>|?
4770 ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
4771
4772 $Element->{$HTML_NS}->{param} = {
4773 %HTMLEmptyChecker,
4774 status => FEATURE_HTML5_REC,
4775 check_attrs => sub {
4776 my ($self, $item, $element_state) = @_;
4777 $GetHTMLAttrsChecker->({
4778 name => sub { },
4779 type => $HTMLIMTAttrChecker,
4780 value => sub { },
4781 valuetype => $GetHTMLEnumeratedAttrChecker->({
4782 data => 1, ref => 1, object => 1,
4783 }),
4784 }, {
4785 %HTMLAttrStatus,
4786 href => FEATURE_RDFA_REC,
4787 id => FEATURE_HTML5_REC,
4788 name => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4789 type => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4790 value => FEATURE_HTML5_WD | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4791 valuetype => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
4792 })->(@_);
4793 unless ($item->{node}->has_attribute_ns (undef, 'name')) {
4794 $self->{onerror}->(node => $item->{node},
4795 type => 'attribute missing',
4796 text => 'name',
4797 level => $self->{level}->{must});
4798 }
4799 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
4800 $self->{onerror}->(node => $item->{node},
4801 type => 'attribute missing',
4802 text => 'value',
4803 level => $self->{level}->{must});
4804 }
4805 },
4806 };
4807
4808 $Element->{$HTML_NS}->{video} = {
4809 %HTMLTransparentChecker,
4810 status => FEATURE_HTML5_LC,
4811 check_attrs => $GetHTMLAttrsChecker->({
4812 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4813 autoplay => sub {
4814 my ($self, $attr) = @_;
4815
4816 ## "Authors are also encouraged to consider not using the
4817 ## automatic playback behavior at all" according to HTML5.
4818 $self->{onerror}->(node => $attr,
4819 type => 'attribute not allowed',
4820 level => $self->{level}->{warn});
4821
4822 $GetHTMLBooleanAttrChecker->('autoplay')->(@_);
4823 },
4824 controls => $GetHTMLBooleanAttrChecker->('controls'),
4825 end => sub { },
4826 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4827 loop => $GetHTMLBooleanAttrChecker->('loop'),
4828 loopend => sub { },
4829 loopstart => sub { },
4830 playcount => sub { },
4831 poster => $HTMLURIAttrChecker,
4832 src => $HTMLURIAttrChecker,
4833 start => sub { },
4834 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4835
4836 ## NOTE: |start|, |end|, |loopstart|, |loopend|, and |playcount|
4837 ## attributes has been deleted from the spec before the exact
4838 ## author requirement is defined.
4839 }, {
4840 %HTMLAttrStatus,
4841 autobuffer => FEATURE_HTML5_LC,
4842 autoplay => FEATURE_HTML5_LC,
4843 controls => FEATURE_HTML5_LC,
4844 end => FEATURE_HTML5_DROPPED,
4845 height => FEATURE_HTML5_LC,
4846 loop => FEATURE_HTML5_LC,
4847 loopend => FEATURE_HTML5_DROPPED,
4848 loopstart => FEATURE_HTML5_DROPPED,
4849 playcount => FEATURE_HTML5_DROPPED,
4850 poster => FEATURE_HTML5_LC,
4851 src => FEATURE_HTML5_LC,
4852 start => FEATURE_HTML5_DROPPED,
4853 width => FEATURE_HTML5_LC,
4854 }), # check_attrs
4855 check_start => sub {
4856 my ($self, $item, $element_state) = @_;
4857 $self->_add_minus_elements ($element_state, {$HTML_NS => {
4858 video => 1, audio => 1,
4859 }});
4860
4861 $element_state->{allow_source}
4862 = not $item->{node}->has_attribute_ns (undef, 'src');
4863 $element_state->{has_source} ||= $element_state->{allow_source} * -1;
4864 ## NOTE: It might be set true by |check_element|.
4865
4866 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4867 $element_state->{uri_info}->{poster}->{type}->{embedded} = 1;
4868 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
4869 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
4870 }, # check_start
4871 check_child_element => sub {
4872 my ($self, $item, $child_el, $child_nsuri, $child_ln,
4873 $child_is_transparent, $element_state) = @_;
4874 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
4875 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
4876 $self->{onerror}->(node => $child_el,
4877 type => 'element not allowed:minus',
4878 level => $self->{level}->{must});
4879 delete $element_state->{allow_source};
4880 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
4881 #
4882 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
4883 unless ($element_state->{allow_source}) {
4884 $self->{onerror}->(node => $child_el,
4885 type => 'element not allowed:flow',
4886 level => $self->{level}->{must});
4887 }
4888 $element_state->{has_source} = 1;
4889 } else {
4890 delete $element_state->{allow_source};
4891 $HTMLFlowContentChecker{check_child_element}->(@_);
4892 }
4893 },
4894 check_child_text => sub {
4895 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
4896 if ($has_significant) {
4897 delete $element_state->{allow_source};
4898 }
4899 $HTMLFlowContentChecker{check_child_text}->(@_);
4900 },
4901 check_end => sub {
4902 my ($self, $item, $element_state) = @_;
4903 $self->_remove_minus_elements ($element_state);
4904
4905 if ($element_state->{has_source} == -1) {
4906 $self->{onerror}->(node => $item->{node},
4907 type => 'child element missing',
4908 text => 'source',
4909 level => $self->{level}->{must});
4910 }
4911
4912 $Element->{$HTML_NS}->{object}->{check_end}->(@_);
4913 },
4914 }; # video
4915
4916 $Element->{$HTML_NS}->{audio} = {
4917 %{$Element->{$HTML_NS}->{video}},
4918 status => FEATURE_HTML5_LC,
4919 check_attrs => $GetHTMLAttrsChecker->({
4920 autobuffer => $GetHTMLBooleanAttrChecker->('autobuffer'),
4921 autoplay => sub {
4922 my ($self, $attr) = @_;
4923
4924 ## "Authors are also encouraged to consider not using the
4925 ## automatic playback behavior at all" according to HTML5.
4926 $self->{onerror}->(node => $attr,
4927 type => 'attribute not allowed',
4928 level => $self->{level}->{warn});
4929
4930 $GetHTMLBooleanAttrChecker->('autoplay')->(@_);
4931 },
4932 controls => $GetHTMLBooleanAttrChecker->('controls'),
4933 end => sub { },
4934 loop => $GetHTMLBooleanAttrChecker->('loop'),
4935 loopend => sub { },
4936 loopstart => sub { },
4937 playcount => sub { },
4938 src => $HTMLURIAttrChecker,
4939 start => sub { },
4940
4941 ## NOTE: |start|, |end|, |loopstart|, |loopend|, and |playcount|
4942 ## attributes has been deleted from the spec before the exact
4943 ## author requirement is defined.
4944 }, {
4945 %HTMLAttrStatus,
4946 autobuffer => FEATURE_HTML5_LC,
4947 autoplay => FEATURE_HTML5_LC,
4948 controls => FEATURE_HTML5_LC,
4949 end => FEATURE_HTML5_DROPPED,
4950 loop => FEATURE_HTML5_LC,
4951 loopend => FEATURE_HTML5_DROPPED,
4952 loopstart => FEATURE_HTML5_DROPPED,
4953 playcount => FEATURE_HTML5_DROPPED,
4954 src => FEATURE_HTML5_LC,
4955 start => FEATURE_HTML5_DROPPED,
4956 }), # check_attrs
4957 }; # audio
4958
4959 $Element->{$HTML_NS}->{source} = {
4960 %HTMLEmptyChecker,
4961 status => FEATURE_HTML5_LC,
4962 check_attrs => sub {
4963 my ($self, $item, $element_state) = @_;
4964 $GetHTMLAttrsChecker->({
4965 media => $HTMLMQAttrChecker,
4966 pixelratio => $PositiveFloatingPointNumberAttrChecker,
4967 src => $HTMLURIAttrChecker,
4968 type => $HTMLIMTAttrChecker,
4969 }, {
4970 %HTMLAttrStatus,
4971 media => FEATURE_HTML5_LC,
4972 pixelratio => FEATURE_HTML5_DROPPED,
4973 src => FEATURE_HTML5_LC,
4974 type => FEATURE_HTML5_LC,
4975 })->(@_);
4976 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
4977 $self->{onerror}->(node => $item->{node},
4978 type => 'attribute missing',
4979 text => 'src',
4980 level => $self->{level}->{must});
4981 }
4982
4983 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
4984
4985 ## NOTE: The |pixelratio| attribute should have been forbidden
4986 ## when the parent of the |source| element is an |audio| element,
4987 ## but the attribute itself has been dropped from the spec.
4988 },
4989 }; # source
4990
4991 $Element->{$HTML_NS}->{canvas} = {
4992 %HTMLTransparentChecker,
4993 status => FEATURE_HTML5_REC,
4994 check_attrs => $GetHTMLAttrsChecker->({
4995 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4996 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
4997 }, {
4998 %HTMLAttrStatus,
4999 height => FEATURE_HTML5_REC,
5000 width => FEATURE_HTML5_REC,
5001 }),
5002
5003 # Authors MUST provide alternative content (HTML5 revision 2868) -
5004 # This requirement cannot be checked, since the alternative content
5005 # might be placed outside of the element.
5006 }; # canvas
5007
5008 $Element->{$HTML_NS}->{map} = {
5009 %HTMLFlowContentChecker,
5010 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5011 check_attrs => sub {
5012 my ($self, $item, $element_state) = @_;
5013 my $has_name;
5014 $GetHTMLAttrsChecker->({
5015 name => sub {
5016 my ($self, $attr) = @_;
5017 my $value = $attr->value;
5018 if (length $value) {
5019 if ($value =~ /[\x09\x0A\x0C\x0D\x20]/) {
5020 $self->{onerror}->(node => $attr, type => 'space in map name',
5021 level => $self->{level}->{must}); ## XXX documentation
5022 }
5023
5024 ## XXXNOTE: Duplication is not non-conforming.
5025 } else {
5026 $self->{onerror}->(node => $attr,
5027 type => 'empty attribute value',
5028 level => $self->{level}->{must});
5029 }
5030 $self->{map}->{$value} ||= $attr;
5031 $has_name = [$value, $attr];
5032 },
5033 }, {
5034 %HTMLAttrStatus,
5035 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5036 dir => FEATURE_HTML5_REC,
5037 id => FEATURE_HTML5_REC,
5038 lang => FEATURE_HTML5_REC,
5039 #name => FEATURE_HTML5_LC | FEATURE_M12N10_REC_DEPRECATED,
5040 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5041 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5042 ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5043 onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5044 onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5045 onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5046 onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5047 onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5048 onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5049 onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5050 onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5051 title => FEATURE_HTML5_REC,
5052 })->(@_);
5053
5054 if ($has_name) {
5055 my $id = $item->{node}->get_attribute_ns (undef, 'id');
5056 if (defined $id and $has_name->[0] ne $id) {
5057 $self->{onerror}
5058 ->(node => $item->{node}->get_attribute_node_ns (undef, 'id'),
5059 type => 'id ne name',
5060 level => $self->{level}->{must});
5061 }
5062 } else {
5063 $self->{onerror}->(node => $item->{node},
5064 type => 'attribute missing',
5065 text => 'name',
5066 level => $self->{level}->{must});
5067 }
5068 },
5069 check_start => sub {
5070 my ($self, $item, $element_state) = @_;
5071 $element_state->{in_map_original} = $self->{flag}->{in_map};
5072 $self->{flag}->{in_map} = [@{$self->{flag}->{in_map} or []}, {}];
5073 ## NOTE: |{in_map}| is a reference to the array which contains
5074 ## hash references. Hashes are corresponding to the opening
5075 ## |map| elements and each of them contains the key-value
5076 ## pairs corresponding to the absolute URLs for the processed
5077 ## |area| elements in the |map| element corresponding to the
5078 ## hash. The key represents the resource (## TODO: use
5079 ## absolute URL), while the value represents whether there is
5080 ## an |area| element whose |alt| attribute is specified to a
5081 ## non-empty value. If there IS such an |area| element for
5082 ## the resource specified by the key, then the value is set to
5083 ## zero (|0|). Otherwise, if there is no such an |area|
5084 ## element but there is any |area| element with the empty
5085 ## |alt=""| attribute, then the value contains an array
5086 ## reference that contains all of such |area| elements.
5087
5088 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5089 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5090 },
5091 check_end => sub {
5092 my ($self, $item, $element_state) = @_;
5093
5094 for (keys %{$self->{flag}->{in_map}->[-1]}) {
5095 my $nodes = $self->{flag}->{in_map}->[-1]->{$_};
5096 next unless $nodes;
5097 for (@$nodes) {
5098 $self->{onerror}->(type => 'empty area alt',
5099 node => $_,
5100 level => $self->{level}->{html5_no_may});
5101 }
5102 }
5103
5104 $self->{flag}->{in_map} = $element_state->{in_map_original};
5105
5106 $HTMLFlowContentChecker{check_end}->(@_);
5107 },
5108 };
5109
5110 $Element->{$HTML_NS}->{area} = {
5111 %HTMLEmptyChecker,
5112 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5113 check_attrs => sub {
5114 my ($self, $item, $element_state) = @_;
5115 my %attr;
5116 my $coords;
5117 for my $attr (@{$item->{node}->attributes}) {
5118 my $attr_ns = $attr->namespace_uri;
5119 $attr_ns = '' unless defined $attr_ns;
5120 my $attr_ln = $attr->manakai_local_name;
5121 my $checker;
5122 my $status;
5123 if ($attr_ns eq '') {
5124 $status = {
5125 %HTMLAttrStatus,
5126 %HTMLM12NCommonAttrStatus,
5127 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5128 alt => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5129 coords => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5130 href => FEATURE_HTML5_WD | FEATURE_RDFA_REC | FEATURE_M12N10_REC,
5131 hreflang => FEATURE_HTML5_WD,
5132 lang => FEATURE_HTML5_REC,
5133 media => FEATURE_HTML5_WD,
5134 nohref => FEATURE_M12N10_REC,
5135 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5136 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5137 ping => FEATURE_HTML5_WD,
5138 rel => FEATURE_HTML5_WD | FEATURE_RDFA_REC,
5139 shape => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
5140 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5141 target => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5142 type => FEATURE_HTML5_WD,
5143 }->{$attr_ln};
5144
5145 $checker = {
5146 alt => sub {
5147 ## NOTE: Checked later.
5148 },
5149 shape => $GetHTMLEnumeratedAttrChecker->({
5150 circ => -1, circle => 1,
5151 default => 1,
5152 poly => 1, polygon => -1,
5153 rect => 1, rectangle => -1,
5154 }),
5155 coords => sub {
5156 my ($self, $attr) = @_;
5157 my $value = $attr->value;
5158 if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
5159 $coords = [split /,/, $value];
5160 } else {
5161 $self->{onerror}->(node => $attr,
5162 type => 'coords:syntax error',
5163 level => $self->{level}->{must});
5164 }
5165 },
5166 nohref => $GetHTMLBooleanAttrChecker->('nohref'),
5167 target => $HTMLTargetAttrChecker,
5168 href => $HTMLURIAttrChecker,
5169 ping => $HTMLSpaceURIsAttrChecker,
5170 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
5171 media => $HTMLMQAttrChecker,
5172 hreflang => $HTMLLanguageTagAttrChecker,
5173 type => $HTMLIMTAttrChecker,
5174 }->{$attr_ln};
5175 if ($checker) {
5176 $attr{$attr_ln} = $attr;
5177 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
5178 $attr_ln !~ /[A-Z]/) {
5179 $checker = $HTMLDatasetAttrChecker;
5180 $status = $HTMLDatasetAttrStatus;
5181 } else {
5182 $checker = $HTMLAttrChecker->{$attr_ln};
5183 }
5184 }
5185 $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
5186 || $AttrChecker->{$attr_ns}->{''};
5187 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
5188 || $AttrStatus->{$attr_ns}->{''};
5189 $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
5190
5191 if ($checker) {
5192 $checker->($self, $attr, $item, $element_state) if ref $checker;
5193 } elsif ($attr_ns eq '' and not $status) {
5194 #
5195 } else {
5196 $self->{onerror}->(node => $attr,
5197 type => 'unknown attribute',
5198 level => $self->{level}->{uncertain});
5199 ## ISSUE: No comformance createria for unknown attributes in the spec
5200 }
5201
5202 $self->_attr_status_info ($attr, $status);
5203 }
5204
5205 if (defined $attr{href}) {
5206 $self->{has_hyperlink_element} = 1;
5207 if (defined $attr{alt}) {
5208 my $url = $attr{href}->value; ## TODO: resolve
5209 if (length $attr{alt}->value) {
5210 for (@{$self->{flag}->{in_map} or []}) {
5211 $_->{$url} = 0;
5212 }
5213 } else {
5214 ## NOTE: Empty |alt=""|. If there is another |area| element
5215 ## with the same |href=""| and that |area| elemnet's
5216 ## |alt=""| attribute is not an empty string, then this
5217 ## is conforming.
5218 for (@{$self->{flag}->{in_map} or []}) {
5219 push @{$_->{$url} ||= []}, $attr{alt}
5220 unless exists $_->{$url} and not $_->{$url};
5221 }
5222 }
5223 } else {
5224 $self->{onerror}->(node => $item->{node},
5225 type => 'attribute missing',
5226 text => 'alt',
5227 level => $self->{level}->{must});
5228 }
5229 } else {
5230 for (qw/target ping rel media hreflang type alt/) {
5231 if (defined $attr{$_}) {
5232 $self->{onerror}->(node => $attr{$_},
5233 type => 'attribute not allowed',
5234 level => $self->{level}->{must});
5235 }
5236 }
5237 }
5238
5239 my $shape = 'rectangle';
5240 if (defined $attr{shape}) {
5241 $shape = {
5242 circ => 'circle', circle => 'circle',
5243 default => 'default',
5244 poly => 'polygon', polygon => 'polygon',
5245 rect => 'rectangle', rectangle => 'rectangle',
5246 }->{lc $attr{shape}->value} || 'rectangle';
5247 ## TODO: ASCII lowercase?
5248 }
5249
5250 if ($shape eq 'circle') {
5251 if (defined $attr{coords}) {
5252 if (defined $coords) {
5253 if (@$coords == 3) {
5254 if ($coords->[2] < 0) {
5255 $self->{onerror}->(node => $attr{coords},
5256 type => 'coords:out of range',
5257 index => 2,
5258 value => $coords->[2],
5259 level => $self->{level}->{must});
5260 }
5261 } else {
5262 $self->{onerror}->(node => $attr{coords},
5263 type => 'coords:number not 3',
5264 text => 0+@$coords,
5265 level => $self->{level}->{must});
5266 }
5267 } else {
5268 ## NOTE: A syntax error has been reported.
5269 }
5270 } else {
5271 $self->{onerror}->(node => $item->{node},
5272 type => 'attribute missing',
5273 text => 'coords',
5274 level => $self->{level}->{must});
5275 }
5276 } elsif ($shape eq 'default') {
5277 if (defined $attr{coords}) {
5278 $self->{onerror}->(node => $attr{coords},
5279 type => 'attribute not allowed',
5280 level => $self->{level}->{must});
5281 }
5282 } elsif ($shape eq 'polygon') {
5283 if (defined $attr{coords}) {
5284 if (defined $coords) {
5285 if (@$coords >= 6) {
5286 unless (@$coords % 2 == 0) {
5287 $self->{onerror}->(node => $attr{coords},
5288 type => 'coords:number not even',
5289 text => 0+@$coords,
5290 level => $self->{level}->{must});
5291 }
5292 } else {
5293 $self->{onerror}->(node => $attr{coords},
5294 type => 'coords:number lt 6',
5295 text => 0+@$coords,
5296 level => $self->{level}->{must});
5297 }
5298 } else {
5299 ## NOTE: A syntax error has been reported.
5300 }
5301 } else {
5302 $self->{onerror}->(node => $item->{node},
5303 type => 'attribute missing',
5304 text => 'coords',
5305 level => $self->{level}->{must});
5306 }
5307 } elsif ($shape eq 'rectangle') {
5308 if (defined $attr{coords}) {
5309 if (defined $coords) {
5310 if (@$coords == 4) {
5311 unless ($coords->[0] < $coords->[2]) {
5312 $self->{onerror}->(node => $attr{coords},
5313 type => 'coords:out of range',
5314 index => 0,
5315 value => $coords->[0],
5316 level => $self->{level}->{must});
5317 }
5318 unless ($coords->[1] < $coords->[3]) {
5319 $self->{onerror}->(node => $attr{coords},
5320 type => 'coords:out of range',
5321 index => 1,
5322 value => $coords->[1],
5323 level => $self->{level}->{must});
5324 }
5325 } else {
5326 $self->{onerror}->(node => $attr{coords},
5327 type => 'coords:number not 4',
5328 text => 0+@$coords,
5329 level => $self->{level}->{must});
5330 }
5331 } else {
5332 ## NOTE: A syntax error has been reported.
5333 }
5334 } else {
5335 $self->{onerror}->(node => $item->{node},
5336 type => 'attribute missing',
5337 text => 'coords',
5338 level => $self->{level}->{must});
5339 }
5340 }
5341
5342 $element_state->{uri_info}->{href}->{type}->{hyperlink} = 1;
5343 },
5344 check_start => sub {
5345 my ($self, $item, $element_state) = @_;
5346 unless ($self->{flag}->{in_map} or
5347 not $item->{node}->manakai_parent_element) {
5348 $self->{onerror}->(node => $item->{node},
5349 type => 'element not allowed:area',
5350 level => $self->{level}->{must});
5351 }
5352
5353 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5354 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5355 },
5356 };
5357
5358 $Element->{$HTML_NS}->{table} = {
5359 %HTMLChecker,
5360 status => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5361 check_attrs => $GetHTMLAttrsChecker->({
5362 cellpadding => $HTMLLengthAttrChecker,
5363 cellspacing => $HTMLLengthAttrChecker,
5364 frame => $GetHTMLEnumeratedAttrChecker->({
5365 void => 1, above => 1, below => 1, hsides => 1, vsides => 1,
5366 lhs => 1, rhs => 1, box => 1, border => 1,
5367 }),
5368 rules => $GetHTMLEnumeratedAttrChecker->({
5369 none => 1, groups => 1, rows => 1, cols => 1, all => 1,
5370 }),
5371 summary => sub {}, ## NOTE: %Text; in HTML4.
5372 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), ## %Pixels;
5373 }, {
5374 %HTMLAttrStatus,
5375 %HTMLM12NXHTML2CommonAttrStatus,
5376 align => FEATURE_M12N10_REC_DEPRECATED,
5377 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5378 border => FEATURE_M12N10_REC,
5379 cellpadding => FEATURE_M12N10_REC,
5380 cellspacing => FEATURE_M12N10_REC,
5381 cols => FEATURE_RFC1942,
5382 datafld => FEATURE_HTML4_REC_RESERVED,
5383 dataformatas => FEATURE_HTML4_REC_RESERVED,
5384 datapagesize => FEATURE_M12N10_REC,
5385 datasrc => FEATURE_HTML4_REC_RESERVED,
5386 frame => FEATURE_M12N10_REC,
5387 lang => FEATURE_HTML5_REC,
5388 rules => FEATURE_M12N10_REC,
5389 summary => FEATURE_M12N10_REC,
5390 width => FEATURE_M12N10_REC,
5391 }),
5392 check_start => sub {
5393 my ($self, $item, $element_state) = @_;
5394 $element_state->{phase} = 'before caption';
5395
5396 $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
5397 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5398 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5399 },
5400 check_child_element => sub {
5401 my ($self, $item, $child_el, $child_nsuri, $child_ln,
5402 $child_is_transparent, $element_state) = @_;
5403 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5404 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5405 $self->{onerror}->(node => $child_el,
5406 type => 'element not allowed:minus',
5407 level => $self->{level}->{must});
5408 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5409 #
5410 } elsif ($element_state->{phase} eq 'in tbodys') {
5411 if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5412 #$element_state->{phase} = 'in tbodys';
5413 } elsif (not $element_state->{has_tfoot} and
5414 $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5415 $element_state->{phase} = 'after tfoot';
5416 $element_state->{has_tfoot} = 1;
5417 } else {
5418 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5419 level => $self->{level}->{must});
5420 }
5421 } elsif ($element_state->{phase} eq 'in trs') {
5422 if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5423 #$element_state->{phase} = 'in trs';
5424 } elsif (not $element_state->{has_tfoot} and
5425 $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5426 $element_state->{phase} = 'after tfoot';
5427 $element_state->{has_tfoot} = 1;
5428 } else {
5429 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5430 level => $self->{level}->{must});
5431 }
5432 } elsif ($element_state->{phase} eq 'after thead') {
5433 if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5434 $element_state->{phase} = 'in tbodys';
5435 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5436 $element_state->{phase} = 'in trs';
5437 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5438 $element_state->{phase} = 'in tbodys';
5439 $element_state->{has_tfoot} = 1;
5440 } else {
5441 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5442 level => $self->{level}->{must});
5443 }
5444 } elsif ($element_state->{phase} eq 'in colgroup') {
5445 if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5446 $element_state->{phase} = 'in colgroup';
5447 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5448 $element_state->{phase} = 'after thead';
5449 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5450 $element_state->{phase} = 'in tbodys';
5451 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5452 $element_state->{phase} = 'in trs';
5453 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5454 $element_state->{phase} = 'in tbodys';
5455 $element_state->{has_tfoot} = 1;
5456 } else {
5457 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5458 level => $self->{level}->{must});
5459 }
5460 } elsif ($element_state->{phase} eq 'before caption') {
5461 if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
5462 $item->{parent_state}->{table_caption_element} = $child_el;
5463 $element_state->{phase} = 'in colgroup';
5464 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
5465 $element_state->{phase} = 'in colgroup';
5466 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
5467 $element_state->{phase} = 'after thead';
5468 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
5469 $element_state->{phase} = 'in tbodys';
5470 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5471 $element_state->{phase} = 'in trs';
5472 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
5473 $element_state->{phase} = 'in tbodys';
5474 $element_state->{has_tfoot} = 1;
5475 } else {
5476 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5477 level => $self->{level}->{must});
5478 }
5479 } elsif ($element_state->{phase} eq 'after tfoot') {
5480 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5481 level => $self->{level}->{must});
5482 } else {
5483 die "check_child_element: Bad |table| phase: $element_state->{phase}";
5484 }
5485 },
5486 check_child_text => sub {
5487 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5488 if ($has_significant) {
5489 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5490 level => $self->{level}->{must});
5491 }
5492 },
5493 check_end => sub {
5494 my ($self, $item, $element_state) = @_;
5495
5496 ## Table model errors
5497 require Whatpm::HTMLTable;
5498 my $table = Whatpm::HTMLTable->form_table ($item->{node}, sub {
5499 $self->{onerror}->(@_);
5500 }, $self->{level});
5501 Whatpm::HTMLTable->assign_header
5502 ($table, $self->{onerror}, $self->{level});
5503 push @{$self->{return}->{table}}, $table;
5504
5505 $HTMLChecker{check_end}->(@_);
5506 },
5507 };
5508
5509 $Element->{$HTML_NS}->{caption} = {
5510 %HTMLFlowContentChecker,
5511 status => FEATURE_HTML5_REC,
5512 check_attrs => $GetHTMLAttrsChecker->({
5513 align => $GetHTMLEnumeratedAttrChecker->({
5514 top => 1, bottom => 1, left => 1, right => 1,
5515 }),
5516 }, {
5517 %HTMLAttrStatus,
5518 %HTMLM12NXHTML2CommonAttrStatus,
5519 align => FEATURE_M12N10_REC_DEPRECATED,
5520 lang => FEATURE_HTML5_REC,
5521 }),
5522 check_start => sub {
5523 my ($self, $item, $element_state) = @_;
5524 $self->_add_minus_elements ($element_state, {$HTML_NS => {table => 1}});
5525
5526 $HTMLFlowContentChecker{check_start}->(@_);
5527 },
5528 check_end => sub {
5529 my ($self, $item, $element_state) = @_;
5530 $self->_remove_minus_elements ($element_state);
5531
5532 $HTMLFlowContentChecker{check_end}->(@_);
5533 },
5534 }; # caption
5535
5536 my %cellalign = (
5537 ## HTML4 %cellhalign;
5538 align => $GetHTMLEnumeratedAttrChecker->({
5539 left => 1, center => 1, right => 1, justify => 1, char => 1,
5540 }),
5541 char => sub {
5542 my ($self, $attr) = @_;
5543
5544 ## NOTE: "character" or |%Character;| in HTML4.
5545
5546 my $value = $attr->value;
5547 if (length $value != 1) {
5548 $self->{onerror}->(node => $attr, type => 'char:syntax error',
5549 level => $self->{level}->{html4_fact});
5550 }
5551 },
5552 charoff => $HTMLLengthAttrChecker,
5553
5554 ## HTML4 %cellvalign;
5555 valign => $GetHTMLEnumeratedAttrChecker->({
5556 top => 1, middle => 1, bottom => 1, baseline => 1,
5557 }),
5558 );
5559
5560 $Element->{$HTML_NS}->{colgroup} = {
5561 %HTMLEmptyChecker,
5562 status => FEATURE_HTML5_REC,
5563 check_attrs => $GetHTMLAttrsChecker->({
5564 %cellalign,
5565 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5566 ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
5567 ## TODO: "attribute not supported" if |col|.
5568 ## ISSUE: MUST NOT if any |col|?
5569 ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
5570 }, {
5571 %HTMLAttrStatus,
5572 %HTMLM12NXHTML2CommonAttrStatus,
5573 align => FEATURE_M12N10_REC,
5574 char => FEATURE_M12N10_REC,
5575 charoff => FEATURE_M12N10_REC,
5576 lang => FEATURE_HTML5_REC,
5577 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5578 valign => FEATURE_M12N10_REC,
5579 width => FEATURE_M12N10_REC,
5580 }),
5581 check_child_element => sub {
5582 my ($self, $item, $child_el, $child_nsuri, $child_ln,
5583 $child_is_transparent, $element_state) = @_;
5584 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5585 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5586 $self->{onerror}->(node => $child_el,
5587 type => 'element not allowed:minus',
5588 level => $self->{level}->{must});
5589 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5590 #
5591 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
5592 #
5593 } else {
5594 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5595 level => $self->{level}->{must});
5596 }
5597 },
5598 check_child_text => sub {
5599 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5600 if ($has_significant) {
5601 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5602 level => $self->{level}->{must});
5603 }
5604 },
5605 };
5606
5607 $Element->{$HTML_NS}->{col} = {
5608 %HTMLEmptyChecker,
5609 status => FEATURE_HTML5_REC,
5610 check_attrs => $GetHTMLAttrsChecker->({
5611 %cellalign,
5612 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5613 }, {
5614 %HTMLAttrStatus,
5615 %HTMLM12NXHTML2CommonAttrStatus,
5616 align => FEATURE_M12N10_REC,
5617 char => FEATURE_M12N10_REC,
5618 charoff => FEATURE_M12N10_REC,
5619 lang => FEATURE_HTML5_REC,
5620 span => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5621 valign => FEATURE_M12N10_REC,
5622 width => FEATURE_M12N10_REC,
5623 }),
5624 };
5625
5626 $Element->{$HTML_NS}->{tbody} = {
5627 %HTMLChecker,
5628 status => FEATURE_HTML5_REC,
5629 check_attrs => $GetHTMLAttrsChecker->({
5630 %cellalign,
5631 }, {
5632 %HTMLAttrStatus,
5633 %HTMLM12NXHTML2CommonAttrStatus,
5634 align => FEATURE_M12N10_REC,
5635 char => FEATURE_M12N10_REC,
5636 charoff => FEATURE_M12N10_REC,
5637 lang => FEATURE_HTML5_REC,
5638 valign => FEATURE_M12N10_REC,
5639 }),
5640 check_child_element => sub {
5641 my ($self, $item, $child_el, $child_nsuri, $child_ln,
5642 $child_is_transparent, $element_state) = @_;
5643 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5644 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5645 $self->{onerror}->(node => $child_el,
5646 type => 'element not allowed:minus',
5647 level => $self->{level}->{must});
5648 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5649 #
5650 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
5651 #
5652 } else {
5653 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5654 level => $self->{level}->{must});
5655 }
5656 },
5657 check_child_text => sub {
5658 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5659 if ($has_significant) {
5660 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5661 level => $self->{level}->{must});
5662 }
5663 },
5664 };
5665
5666 $Element->{$HTML_NS}->{thead} = {
5667 %{$Element->{$HTML_NS}->{tbody}},
5668 check_start => sub {
5669 my ($self, $item, $element_state) = @_;
5670 $element_state->{in_thead} = 1;
5671
5672 $HTMLChecker{check_start}->(@_);
5673 }, # check_start
5674 }; # thead
5675
5676 $Element->{$HTML_NS}->{tfoot} = {
5677 %{$Element->{$HTML_NS}->{tbody}},
5678 };
5679
5680 $Element->{$HTML_NS}->{tr} = {
5681 %HTMLChecker,
5682 status => FEATURE_HTML5_REC,
5683 check_attrs => $GetHTMLAttrsChecker->({
5684 %cellalign,
5685 bgcolor => $HTMLColorAttrChecker,
5686 }, {
5687 %HTMLAttrStatus,
5688 %HTMLM12NXHTML2CommonAttrStatus,
5689 align => FEATURE_M12N10_REC,
5690 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5691 char => FEATURE_M12N10_REC,
5692 charoff => FEATURE_M12N10_REC,
5693 lang => FEATURE_HTML5_REC,
5694 valign => FEATURE_M12N10_REC,
5695 }),
5696 check_child_element => sub {
5697 my ($self, $item, $child_el, $child_nsuri, $child_ln,
5698 $child_is_transparent, $element_state) = @_;
5699 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5700 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5701 $self->{onerror}->(node => $child_el,
5702 type => 'element not allowed:minus',
5703 level => $self->{level}->{must});
5704 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5705 #
5706 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'td') {
5707 if ($item->{parent_state}->{in_thead}) {
5708 $self->{onerror}->(node => $child_el, # XXX document the error type
5709 type => 'element not allowed:thead td',
5710 level => $self->{level}->{must});
5711 }
5712 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'th') {
5713 #
5714 } else {
5715 $self->{onerror}->(node => $child_el, type => 'element not allowed',
5716 level => $self->{level}->{must});
5717 }
5718 },
5719 check_child_text => sub {
5720 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5721 if ($has_significant) {
5722 $self->{onerror}->(node => $child_node, type => 'character not allowed',
5723 level => $self->{level}->{must});
5724 }
5725 },
5726 };
5727
5728 $Element->{$HTML_NS}->{td} = {
5729 %HTMLFlowContentChecker,
5730 status => FEATURE_HTML5_REC,
5731 check_attrs => $GetHTMLAttrsChecker->({
5732 %cellalign,
5733 abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5734 axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5735 bgcolor => $HTMLColorAttrChecker,
5736 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5737 headers => sub {
5738 ## NOTE: Will be checked by Whatpm::HTMLTable->assign_header.
5739 ## Though that method does not check the |headers| attribute of a
5740 ## |td| element if the element does not form a table, in that case
5741 ## the |td| element is non-conforming anyway.
5742 },
5743 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5744 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5745 scope => $GetHTMLEnumeratedAttrChecker
5746 ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5747 }, {
5748 %HTMLAttrStatus,
5749 %HTMLM12NXHTML2CommonAttrStatus,
5750 abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5751 align => FEATURE_M12N10_REC,
5752 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5753 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5754 char => FEATURE_M12N10_REC,
5755 charoff => FEATURE_M12N10_REC,
5756 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5757 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5758 height => FEATURE_M12N10_REC_DEPRECATED,
5759 lang => FEATURE_HTML5_REC,
5760 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5761 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5762 scope => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5763 valign => FEATURE_M12N10_REC,
5764 width => FEATURE_M12N10_REC_DEPRECATED,
5765 }),
5766 };
5767
5768 $Element->{$HTML_NS}->{th} = {
5769 %HTMLPhrasingContentChecker,
5770 status => FEATURE_HTML5_REC,
5771 check_attrs => $GetHTMLAttrsChecker->({
5772 %cellalign,
5773 abbr => sub {}, ## NOTE: HTML4 %Text; and SHOULD be short.
5774 axis => sub {}, ## NOTE: HTML4 "cdata", comma-separated
5775 bgcolor => $HTMLColorAttrChecker,
5776 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5777 ## TODO: HTML4(?) |headers|
5778 nowrap => $GetHTMLBooleanAttrChecker->('nowrap'),
5779 rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
5780 scope => $GetHTMLEnumeratedAttrChecker
5781 ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
5782 }, {
5783 %HTMLAttrStatus,
5784 %HTMLM12NXHTML2CommonAttrStatus,
5785 abbr => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5786 align => FEATURE_M12N10_REC,
5787 axis => FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5788 bgcolor => FEATURE_M12N10_REC_DEPRECATED,
5789 char => FEATURE_M12N10_REC,
5790 charoff => FEATURE_M12N10_REC,
5791 colspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5792 headers => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5793 height => FEATURE_M12N10_REC_DEPRECATED,
5794 lang => FEATURE_HTML5_REC,
5795 nowrap => FEATURE_M12N10_REC_DEPRECATED,
5796 rowspan => FEATURE_HTML5_LC | FEATURE_XHTML2_ED | FEATURE_M12N10_REC,
5797 scope => FEATURE_HTML5_REC,
5798 valign => FEATURE_M12N10_REC,
5799 width => FEATURE_M12N10_REC_DEPRECATED,
5800 }),
5801 };
5802
5803 $Element->{$HTML_NS}->{form} = {
5804 %HTMLFlowContentChecker,
5805 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5806 check_attrs => $GetHTMLAttrsChecker->({
5807 accept => $AcceptAttrChecker,
5808 'accept-charset' => $HTMLCharsetsAttrChecker,
5809 action => $HTMLURIAttrChecker, ## TODO: Warn if submission is not defined for the scheme
5810 autocomplete => $GetHTMLEnumeratedAttrChecker->({
5811 on => 1, off => 1,
5812 }),
5813 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
5814 enctype => $GetHTMLEnumeratedAttrChecker->({
5815 'application/x-www-form-urlencoded' => 1,
5816 'multipart/form-data' => 1,
5817 'text/plain' => 1,
5818 }),
5819 method => $GetHTMLEnumeratedAttrChecker->({
5820 get => 1, post => 1, put => 1, delete => 1,
5821 }),
5822 name => sub {
5823 my ($self, $attr) = @_;
5824
5825 my $value = $attr->value;
5826 if ($value eq '') {
5827 $self->{onerror}->(type => 'empty form name',
5828 node => $attr,
5829 level => $self->{level}->{must});
5830 } else {
5831 if ($self->{form}->{$value}) {
5832 $self->{onerror}->(type => 'duplicate form name',
5833 node => $attr,
5834 value => $value,
5835 level => $self->{level}->{must});
5836 } else {
5837 $self->{form}->{$value} = 1;
5838 }
5839 }
5840 },
5841 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
5842 onformchange => $HTMLEventHandlerAttrChecker,
5843 onforminput => $HTMLEventHandlerAttrChecker,
5844 onreceived => $HTMLEventHandlerAttrChecker,
5845 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
5846 target => $HTMLTargetAttrChecker,
5847 }, {
5848 %HTMLAttrStatus,
5849 %HTMLM12NCommonAttrStatus,
5850 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_M12N10_REC,
5851 'accept-charset' => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5852 action => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5853 autocomplete => FEATURE_HTML5_WD,
5854 data => FEATURE_WF2,
5855 enctype => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5856 lang => FEATURE_HTML5_REC,
5857 method => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5858 #name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
5859 name => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5860 novalidate => FEATURE_HTML5_LC,
5861 onformchange => FEATURE_WF2_INFORMATIVE,
5862 onforminput => FEATURE_WF2_INFORMATIVE,
5863 onreceived => FEATURE_WF2,
5864 onreset => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5865 onsubmit => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5866 replace => FEATURE_WF2,
5867 sdapref => FEATURE_HTML20_RFC,
5868 sdasuff => FEATURE_HTML20_RFC,
5869 target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5870 }),
5871 check_start => sub {
5872 my ($self, $item, $element_state) = @_;
5873 $self->_add_minus_elements ($element_state, {$HTML_NS => {form => 1}});
5874
5875 $element_state->{uri_info}->{action}->{type}->{action} = 1;
5876 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
5877 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
5878 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
5879 $element_state->{id_type} = 'form';
5880 },
5881 check_end => sub {
5882 my ($self, $item, $element_state) = @_;
5883 $self->_remove_minus_elements ($element_state);
5884
5885 $HTMLFlowContentChecker{check_end}->(@_);
5886 },
5887 }; # form
5888
5889 $Element->{$HTML_NS}->{fieldset} = {
5890 %HTMLFlowContentChecker,
5891 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5892 check_attrs => $GetHTMLAttrsChecker->({
5893 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
5894 form => $HTMLFormAttrChecker,
5895 name => $FormControlNameAttrChecker,
5896 }, {
5897 %HTMLAttrStatus,
5898 %HTMLM12NCommonAttrStatus,
5899 disabled => FEATURE_HTML5_WD | FEATURE_WF2X,
5900 form => FEATURE_HTML5_LC | FEATURE_WF2X,
5901 lang => FEATURE_HTML5_REC,
5902 name => FEATURE_HTML5_LC,
5903 }),
5904 ## NOTE: legend, Flow
5905 check_child_element => sub {
5906 my ($self, $item, $child_el, $child_nsuri, $child_ln,
5907 $child_is_transparent, $element_state) = @_;
5908 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
5909 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
5910 $self->{onerror}->(node => $child_el,
5911 type => 'element not allowed:minus',
5912 level => $self->{level}->{must});
5913 $element_state->{has_non_legend} = 1;
5914 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
5915 #
5916 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
5917 if ($element_state->{has_non_legend}) {
5918 $self->{onerror}->(node => $child_el,
5919 type => 'element not allowed:details legend',
5920 level => $self->{level}->{must});
5921 }
5922 $element_state->{has_legend} = 1;
5923 $element_state->{has_non_legend} = 1;
5924 } else {
5925 $HTMLFlowContentChecker{check_child_element}->(@_);
5926 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
5927 ## TODO:
5928 ## |<fieldset><object><legend>xx</legend></object>..</fieldset>|
5929 ## should be an error, since |object| is allowed as flow,
5930 ## therefore |details| part of the content model does not match.
5931 }
5932 },
5933 check_child_text => sub {
5934 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
5935 if ($has_significant) {
5936 $element_state->{has_non_legend} = 1;
5937 }
5938 },
5939 check_end => sub {
5940 my ($self, $item, $element_state) = @_;
5941
5942 unless ($element_state->{has_legend}) {
5943 $self->{onerror}->(node => $item->{node},
5944 type => 'child element missing',
5945 text => 'legend',
5946 level => $self->{level}->{must});
5947 }
5948
5949 $HTMLFlowContentChecker{check_end}->(@_);
5950 ## ISSUE: |<fieldset><legend>aa</legend></fieldset>| error?
5951 },
5952 ## NOTE: This definition is partially reused by |details| element's
5953 ## checker.
5954 };
5955
5956 $Element->{$HTML_NS}->{input} = {
5957 %HTMLEmptyChecker,
5958 status => FEATURE_HTML5_WD | FEATURE_WF2X | FEATURE_M12N10_REC,
5959 check_attrs => sub {
5960 my ($self, $item, $element_state) = @_;
5961
5962 my $state = $item->{node}->get_attribute_ns (undef, 'type');
5963 $state = 'text' unless defined $state;
5964 $state =~ tr/A-Z/a-z/; ## ASCII case-insensitive
5965
5966 for my $attr (@{$item->{node}->attributes}) {
5967 my $attr_ns = $attr->namespace_uri;
5968 $attr_ns = '' unless defined $attr_ns;
5969 my $attr_ln = $attr->manakai_local_name;
5970 my $checker;
5971 my $status;
5972 if ($attr_ns eq '') {
5973 $status =
5974 {
5975 %HTMLAttrStatus,
5976 %HTMLM12NCommonAttrStatus,
5977 accept => FEATURE_HTML5_DEFAULT | FEATURE_WF2X | FEATURE_M12N10_REC,
5978 'accept-charset' => FEATURE_HTML2X_RFC,
5979 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
5980 action => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
5981 align => FEATURE_M12N10_REC_DEPRECATED,
5982 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
5983 autocomplete => FEATURE_HTML5_LC | FEATURE_WF2X,
5984 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
5985 checked => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
5986 datafld => FEATURE_HTML4_REC_RESERVED,
5987 dataformatas => FEATURE_HTML4_REC_RESERVED,
5988 datasrc => FEATURE_HTML4_REC_RESERVED,
5989 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
5990 enctype => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
5991 form => FEATURE_HTML5_LC | FEATURE_WF2X,
5992 formaction => FEATURE_HTML5_LC,
5993 formenctype => FEATURE_HTML5_LC,
5994 formmethod => FEATURE_HTML5_LC,
5995 formnovalidate => FEATURE_HTML5_LC,
5996 formtarget => FEATURE_HTML5_LC,
5997 height => FEATURE_HTML5_LC,
5998 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X |
5999 FEATURE_XHTMLBASIC11_CR,
6000 ismap => FEATURE_M12N10_REC,
6001 lang => FEATURE_HTML5_REC,
6002 list => FEATURE_HTML5_LC | FEATURE_WF2X,
6003 max => FEATURE_HTML5_LC | FEATURE_WF2X,
6004 maxlength => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6005 method => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6006 min => FEATURE_HTML5_LC | FEATURE_WF2X,
6007 multiple => FEATURE_HTML5_LC,
6008 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6009 novalidate => FEATURE_HTML5_DROPPED,
6010 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6011 onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6012 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6013 onformchange => FEATURE_WF2_INFORMATIVE,
6014 onforminput => FEATURE_WF2_INFORMATIVE,
6015 oninput => FEATURE_WF2,
6016 oninvalid => FEATURE_WF2,
6017 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6018 pattern => FEATURE_HTML5_LC | FEATURE_WF2X,
6019 placeholder => FEATURE_HTML5_LC,
6020 readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6021 replace => FEATURE_WF2,
6022 required => FEATURE_HTML5_LC | FEATURE_WF2X,
6023 sdapref => FEATURE_HTML20_RFC,
6024 size => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6025 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6026 step => FEATURE_HTML5_LC | FEATURE_WF2X,
6027 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6028 target => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6029 template => FEATURE_HTML5_DROPPED | FEATURE_WF2,
6030 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6031 usemap => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
6032 value => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
6033 width => FEATURE_HTML5_LC,
6034 }->{$attr_ln};
6035
6036 $checker =
6037 {
6038 ## NOTE: Value of an empty string means that the attribute is only
6039 ## applicable for a specific set of states.
6040 accept => '',
6041 'accept-charset' => $HTMLCharsetsAttrChecker,
6042 ## NOTE: To which states it applies is not defined in RFC 2070.
6043 action => '',
6044 align => '',
6045 alt => '',
6046 autocomplete => '',
6047 autofocus => $AutofocusAttrChecker,
6048 ## NOTE: <input type=hidden disabled> is not disallowed.
6049 checked => '',
6050 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6051 ## NOTE: <input type=hidden disabled> is not disallowed.
6052 enctype => '',
6053 form => $HTMLFormAttrChecker,
6054 formaction => '',
6055 formenctype => '',
6056 formmethod => '',
6057 formnovalidate => '',
6058 formtarget => '',
6059 height => '',
6060 inputmode => '',
6061 ismap => '', ## NOTE: "MUST" be type=image [HTML4]
6062 list => '',
6063 max => '',
6064 maxlength => '',
6065 method => '',
6066 min => '',
6067 multiple => '',
6068 name => $FormControlNameAttrChecker,
6069 novalidate => '',
6070 onformchange => $HTMLEventHandlerAttrChecker, # [WF2]
6071 onforminput => $HTMLEventHandlerAttrChecker, # [WF2]
6072 oninput => $HTMLEventHandlerAttrChecker, # [WF2]
6073 oninvalid => $HTMLEventHandlerAttrChecker, # [WF2]
6074 ## TODO: tests for four attributes above
6075 pattern => '',
6076 placeholder => '',
6077 readonly => '',
6078 replace => '',
6079 required => '',
6080 size => '',
6081 src => '',
6082 step => '',
6083 target => '',
6084 type => $GetHTMLEnumeratedAttrChecker->({
6085 hidden => 1, text => 1, search => 1, url => 1,
6086 tel => 1, email => 1, password => 1,
6087 datetime => 1, date => 1, month => 1, week => 1, time => 1,
6088 'datetime-local' => 1, number => 1, range => 1, color => 1,
6089 checkbox => 1,
6090 radio => 1, file => 1, submit => 1, image => 1, reset => 1,
6091 button => 1,
6092 }),
6093 usemap => '',
6094 value => '',
6095 width => '',
6096 }->{$attr_ln};
6097
6098 ## State-dependent checkers
6099 unless ($checker) {
6100 if ($state eq 'hidden') {
6101 $checker =
6102 {
6103 value => sub {
6104 my ($self, $attr, $item, $element_state) = @_;
6105 my $name = $item->{node}->get_attribute_ns (undef, 'name');
6106 if (defined $name and $name eq '_charset_') { ## case-sensitive
6107 $self->{onerror}->(node => $attr,
6108 type => '_charset_ value',
6109 level => $self->{level}->{must});
6110 }
6111 },
6112 }->{$attr_ln} || $checker;
6113 ## TODO: Warn if no name attribute?
6114 ## TODO: Warn if name!=_charset_ and no value attribute?
6115 } elsif ({
6116 datetime => 1, date => 1, month => 1, time => 1,
6117 week => 1, 'datetime-local' => 1,
6118 }->{$state}) {
6119 my $v = {
6120 datetime => ['global_date_and_time_string'],
6121 date => ['date_string'],
6122 month => ['month_string'],
6123 week => ['week_string'],
6124 time => ['time_string'],
6125 'datetime-local' => ['local_date_and_time_string'],
6126 }->{$state};
6127 $checker =
6128 {
6129 autocomplete => $GetHTMLEnumeratedAttrChecker->({
6130 on => 1, off => 1,
6131 }),
6132 list => $ListAttrChecker,
6133 min => $GetDateTimeAttrChecker->($v->[0]),
6134 max => $GetDateTimeAttrChecker->($v->[0]),
6135 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6136 required => $GetHTMLBooleanAttrChecker->('required'),
6137 step => $StepAttrChecker,
6138 value => $GetDateTimeAttrChecker->($v->[0]),
6139 }->{$attr_ln} || $checker;
6140
6141 ## XXX Maybe it is better to check min <= value <= max
6142 ## relation is hold for convinience?
6143 } elsif ($state eq 'number') {
6144 $checker =
6145 {
6146 autocomplete => $GetHTMLEnumeratedAttrChecker->({
6147 on => 1, off => 1,
6148 }),
6149 list => $ListAttrChecker,
6150 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
6151 min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
6152 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6153 required => $GetHTMLBooleanAttrChecker->('required'),
6154 step => $StepAttrChecker,
6155 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
6156 }->{$attr_ln} || $checker;
6157 } elsif ($state eq 'range') {
6158 $checker =
6159 {
6160 autocomplete => $GetHTMLEnumeratedAttrChecker->({
6161 on => 1, off => 1,
6162 }),
6163 list => $ListAttrChecker,
6164 max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
6165 min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
6166 step => $StepAttrChecker,
6167 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
6168 }->{$attr_ln} || $checker;
6169 } elsif ($state eq 'color') {
6170 $checker =
6171 {
6172 autocomplete => $GetHTMLEnumeratedAttrChecker->({
6173 on => 1, off => 1,
6174 }),
6175 list => $ListAttrChecker,
6176 value => sub {
6177 my ($self, $attr) = @_;
6178 unless ($attr->value =~ /\A#[0-9A-Fa-f]{6}\z/) {
6179 $self->{onerror}->(node => $attr,
6180 type => 'scolor:syntax error', ## TODOC: type
6181 level => $self->{level}->{must});
6182 }
6183 },
6184 }->{$attr_ln} || $checker;
6185 } elsif ($state eq 'checkbox' or $state eq 'radio') {
6186 $checker =
6187 {
6188 checked => $GetHTMLBooleanAttrChecker->('checked'),
6189 ## TODO: tests
6190 required => $GetHTMLBooleanAttrChecker->('required'),
6191 value => sub { }, ## NOTE: No restriction.
6192 }->{$attr_ln} || $checker;
6193 ## TODO: There MUST be another input type=radio with same
6194 ## name (Radio state).
6195 ## ISSUE: There should be exactly one type=radio with checked?
6196 } elsif ($state eq 'file') {
6197 $checker =
6198 {
6199 accept => $AcceptAttrChecker,
6200 ## max (default 1) & min (default 0) [WF2]: Dropped by HTML5.
6201 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6202 required => $GetHTMLBooleanAttrChecker->('required'),
6203 }->{$attr_ln} || $checker;
6204 } elsif ($state eq 'submit') {
6205 $checker =
6206 {
6207 action => $HTMLURIAttrChecker,
6208 enctype => $GetHTMLEnumeratedAttrChecker->({
6209 'application/x-www-form-urlencoded' => 1,
6210 'multipart/form-data' => 1,
6211 'text/plain' => 1,
6212 }),
6213 formaction => $HTMLURIAttrChecker,
6214 formenctype => $GetHTMLEnumeratedAttrChecker->({
6215 'application/x-www-form-urlencoded' => 1,
6216 'multipart/form-data' => 1,
6217 'text/plain' => 1,
6218 }),
6219 formmethod => $GetHTMLEnumeratedAttrChecker->({
6220 get => 1, post => 1, put => 1, delete => 1,
6221 }),
6222 formnovalidate => $GetHTMLBooleanAttrChecker->('formnovalidate'),
6223 formtarget => $HTMLTargetAttrChecker,
6224 method => $GetHTMLEnumeratedAttrChecker->({
6225 get => 1, post => 1, put => 1, delete => 1,
6226 }),
6227 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6228 replace => $GetHTMLEnumeratedAttrChecker->({
6229 document => 1, values => 1,
6230 }),
6231 target => $HTMLTargetAttrChecker,
6232 value => sub { }, ## NOTE: No restriction.
6233 }->{$attr_ln} || $checker;
6234 } elsif ($state eq 'image') {
6235 $checker =
6236 {
6237 action => $HTMLURIAttrChecker,
6238 align => $GetHTMLEnumeratedAttrChecker->({
6239 top => 1, middle => 1, bottom => 1, left => 1, right => 1,
6240 }),
6241 alt => sub {
6242 my ($self, $attr) = @_;
6243 my $value = $attr->value;
6244 unless (length $value) {
6245 $self->{onerror}->(node => $attr,
6246 type => 'empty anchor image alt',
6247 level => $self->{level}->{must});
6248 }
6249 },
6250 enctype => $GetHTMLEnumeratedAttrChecker->({
6251 'application/x-www-form-urlencoded' => 1,
6252 'multipart/form-data' => 1,
6253 'text/plain' => 1,
6254 }),
6255 formaction => $HTMLURIAttrChecker,
6256 formenctype => $GetHTMLEnumeratedAttrChecker->({
6257 'application/x-www-form-urlencoded' => 1,
6258 'multipart/form-data' => 1,
6259 'text/plain' => 1,
6260 }),
6261 formmethod => $GetHTMLEnumeratedAttrChecker->({
6262 get => 1, post => 1, put => 1, delete => 1,
6263 }),
6264 formnovalidate => $GetHTMLBooleanAttrChecker->('formnovalidate'),
6265 formtarget => $HTMLTargetAttrChecker,
6266 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
6267 ismap => $GetHTMLBooleanAttrChecker->('ismap'),
6268 method => $GetHTMLEnumeratedAttrChecker->({
6269 get => 1, post => 1, put => 1, delete => 1,
6270 }),
6271 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6272 replace => $GetHTMLEnumeratedAttrChecker->({
6273 document => 1, values => 1,
6274 }),
6275 src => $HTMLURIAttrChecker,
6276 ## TODO: There is requirements on the referenced resource.
6277 target => $HTMLTargetAttrChecker,
6278 usemap => $HTMLUsemapAttrChecker,
6279 width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
6280 }->{$attr_ln} || $checker;
6281 ## TODO: alt & src are required.
6282 } elsif ({
6283 reset => 1, button => 1,
6284 ## NOTE: From Web Forms 2.0:
6285 remove => 1, 'move-up' => 1, 'move-down' => 1,
6286 add => 1,
6287 }->{$state}) {
6288 $checker =
6289 {
6290 ## NOTE: According to Web Forms 2.0, |input| attribute
6291 ## has |template| attribute to support the |add| button
6292 ## type (as part of the repetition template feature). It
6293 ## conflicts with the |template| global attribute
6294 ## introduced as part of the data template feature.
6295 ## NOTE: |template| attribute as defined in Web Forms 2.0
6296 ## has no author requirement.
6297 value => sub { }, ## NOTE: No restriction.
6298 }->{$attr_ln} || $checker;
6299 } else { # Text, Search, E-mail, URL, Telephone, Password
6300 $checker =
6301 {
6302 autocomplete => $GetHTMLEnumeratedAttrChecker->({
6303 on => 1, off => 1,
6304 }),
6305 ## TODO: inputmode [WF2]
6306 list => $ListAttrChecker,
6307 maxlength => sub {
6308 my ($self, $attr, $item, $element_state) = @_;
6309
6310 $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
6311
6312 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
6313 ## NOTE: Applying the rules for parsing non-negative
6314 ## integers results in a number.
6315 my $max_allowed_value_length = 0+$1;
6316
6317 my $value = $item->{node}->get_attribute_ns (undef, 'value');
6318 if (defined $value) {
6319 my $codepoint_length = length $value;
6320
6321 if ($codepoint_length > $max_allowed_value_length) {
6322 $self->{onerror}
6323 ->(node => $item->{node}
6324 ->get_attribute_node_ns (undef, 'value'),
6325 type => 'value too long',
6326 level => $self->{level}->{must});
6327 }
6328 }
6329 }
6330 },
6331 pattern => $PatternAttrChecker,
6332 placeholder => $PlaceholderAttrChecker,
6333 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
6334 required => $GetHTMLBooleanAttrChecker->('required'),
6335 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub {shift > 0}),
6336 value => sub {
6337 my ($self, $attr, $item, $element_state) = @_;
6338 if ($state eq 'url') {
6339 ## XXX MUST be absolute IRI.
6340 $HTMLURIAttrChecker->(@_);
6341 } elsif ($state eq 'email') {
6342 if ($item->{node}->has_attribute_ns (undef, 'multiple')) {
6343 ## A set of comma-separated tokens.
6344 my @addr = split /,/, $attr->value, -1;
6345 @addr = ('') unless @addr;
6346 for (@addr) {
6347 s/\A[\x09\x0A\x0C\x0D\x20]+//;
6348 s/[\x09\x0A\x0C\x0D\x20]\z//;
6349
6350 unless (/\A$ValidEmailAddress\z/o) {
6351 $self->{onerror}->(node => $attr,
6352 type => 'email:syntax error', ## TODO: type
6353 value => $_,
6354 level => $self->{level}->{must});
6355 }
6356 }
6357 } else {
6358 unless ($attr->value =~ /\A$ValidEmailAddress\z/) {
6359 $self->{onerror}->(node => $attr,
6360 type => 'email:syntax error', ## TODO: type
6361 level => $self->{level}->{must});
6362 }
6363 }
6364 } else {
6365 if ($attr->value =~ /[\x0D\x0A]/) {
6366 $self->{onerror}->(node => $attr,
6367 type => 'newline in value', ## TODO: type
6368 level => $self->{level}->{must});
6369 }
6370 }
6371 },
6372 }->{$attr_ln} || $checker;
6373 $checker = '' if $state eq 'password' and $attr_ln eq 'list';
6374 $checker = $GetHTMLBooleanAttrChecker->('multiple')
6375 if $state eq 'email' and $attr_ln eq 'multiple';
6376
6377 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
6378 not $item->{node}->has_attribute_ns (undef, 'title')) {
6379 $self->{onerror}->(node => $item->{node},
6380 type => 'attribute missing',
6381 text => 'title',
6382 level => $self->{level}->{should});
6383 }
6384 }
6385 }
6386
6387 if (defined $checker) {
6388 if ($checker eq '') {
6389 $checker = sub {
6390 my ($self, $attr) = @_;
6391 $self->{onerror}->(node => $attr,
6392 type => 'input attr not applicable',
6393 text => $state,
6394 level => $self->{level}->{must});
6395 };
6396 }
6397 } elsif ($attr_ln =~ /^data-\p{InXMLNCNameChar10}+\z/ and
6398 $attr_ln !~ /[A-Z]/) {
6399 $checker = $HTMLDatasetAttrChecker;
6400 $status = $HTMLDatasetAttrStatus;
6401 } else {
6402 $checker = $HTMLAttrChecker->{$attr_ln};
6403 }
6404 }
6405 $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
6406 || $AttrChecker->{$attr_ns}->{''};
6407 $status ||= $AttrStatus->{$attr_ns}->{$attr_ln}
6408 || $AttrStatus->{$attr_ns}->{''};
6409 $status = FEATURE_ALLOWED if not defined $status and length $attr_ns;
6410
6411 if ($checker) {
6412 $checker->($self, $attr, $item, $element_state) if ref $checker;
6413 } elsif ($attr_ns eq '' and not $status) {
6414 #
6415 } else {
6416 $self->{onerror}->(node => $attr,
6417 type => 'unknown attribute',
6418 level => $self->{level}->{uncertain});
6419 ## ISSUE: No comformance createria for unknown attributes in the spec
6420 }
6421
6422 $self->_attr_status_info ($attr, $status);
6423 }
6424
6425 ## ISSUE: -0/+0
6426
6427 if ($state eq 'range') {
6428 $element_state->{number_value}->{min} ||= 0;
6429 $element_state->{number_value}->{max} = 100
6430 unless defined $element_state->{number_value}->{max};
6431 }
6432
6433 if (defined $element_state->{date_value}->{min} or
6434 defined $element_state->{date_value}->{max}) {
6435 my $min_value = $element_state->{date_value}->{min};
6436 my $max_value = $element_state->{date_value}->{max};
6437 my $value_value = $element_state->{date_value}->{value};
6438
6439 if (defined $min_value and $min_value eq '' and
6440 (defined $max_value or defined $value_value)) {
6441 my $min = $item->{node}->get_attribute_node_ns (undef, 'min');
6442 $self->{onerror}->(node => $min,
6443 type => 'date value not supported', ## TODOC: type
6444 value => $min->value,
6445 level => $self->{level}->{unsupported});
6446 undef $min_value;
6447 }
6448 if (defined $max_value and $max_value eq '' and
6449 (defined $max_value or defined $value_value)) {
6450 my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6451 $self->{onerror}->(node => $max,
6452 type => 'date value not supported', ## TODOC: type
6453 value => $max->value,
6454 level => $self->{level}->{unsupported});
6455 undef $max_value;
6456 }
6457 if (defined $value_value and $value_value eq '' and
6458 (defined $max_value or defined $min_value)) {
6459 my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6460 $self->{onerror}->(node => $value,
6461 type => 'date value not supported', ## TODOC: type
6462 value => $value->value,
6463 level => $self->{level}->{unsupported});
6464 undef $value_value;
6465 }
6466
6467 if (defined $min_value and defined $max_value) {
6468 if ($min_value->to_html5_number > $max_value->to_html5_number) {
6469 my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6470 $self->{onerror}->(node => $max,
6471 type => 'max lt min', ## TODOC: type
6472 level => $self->{level}->{must});
6473 }
6474 }
6475
6476 if (defined $min_value and defined $value_value) {
6477 if ($min_value->to_html5_number > $value_value->to_html5_number) {
6478 my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6479 $self->{onerror}->(node => $value,
6480 type => 'value lt min', ## TODOC: type
6481 level => $self->{level}->{warn});
6482 ## NOTE: Not an error.
6483 }
6484 }
6485
6486 if (defined $max_value and defined $value_value) {
6487 if ($max_value->to_html5_number < $value_value->to_html5_number) {
6488 my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6489 $self->{onerror}->(node => $value,
6490 type => 'value gt max', ## TODOC: type
6491 level => $self->{level}->{warn});
6492 ## NOTE: Not an error.
6493 }
6494 }
6495 } elsif (defined $element_state->{number_value}->{min} or
6496 defined $element_state->{number_value}->{max}) {
6497 my $min_value = $element_state->{number_value}->{min};
6498 my $max_value = $element_state->{number_value}->{max};
6499 my $value_value = $element_state->{number_value}->{value};
6500
6501 if (defined $min_value and defined $max_value) {
6502 if ($min_value > $max_value) {
6503 my $max = $item->{node}->get_attribute_node_ns (undef, 'max');
6504 $self->{onerror}->(node => $max,
6505 type => 'max lt min', ## TODOC: type
6506 level => $self->{level}->{must});
6507 }
6508 }
6509
6510 if (defined $min_value and defined $value_value) {
6511 if ($min_value > $value_value) {
6512 my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6513 $self->{onerror}->(node => $value,
6514 type => 'value lt min', ## TODOC: type
6515 level => $self->{level}->{warn});
6516 ## NOTE: Not an error.
6517 }
6518 }
6519
6520 if (defined $max_value and defined $value_value) {
6521 if ($max_value < $value_value) {
6522 my $value = $item->{node}->get_attribute_node_ns (undef, 'value');
6523 $self->{onerror}->(node => $value,
6524 type => 'value gt max', ## TODOC: type
6525 level => $self->{level}->{warn});
6526 ## NOTE: Not an error.
6527 }
6528 }
6529 }
6530
6531 ## TODO: Warn unless value = min * x where x is an integer.
6532
6533 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6534 $element_state->{uri_info}->{action}->{type}->{formaction} = 1;
6535 $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6536 $element_state->{uri_info}->{src}->{type}->{embedded} = 1;
6537 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6538 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6539 }, # check_attrs
6540 check_start => sub {
6541 my ($self, $item, $element_state) = @_;
6542 $FAECheckStart->($self, $item, $element_state);
6543 }, # check_start
6544 check_attrs2 => sub {
6545 my ($self, $item, $element_state) = @_;
6546 $FAECheckAttrs2->($self, $item, $element_state);
6547 }, # check_attrs2
6548 }; # input
6549
6550 ## XXXresource: Dimension attributes have requirements on width and
6551 ## height of referenced resource.
6552
6553 $Element->{$HTML_NS}->{button} = {
6554 %HTMLPhrasingContentChecker,
6555 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6556 check_attrs => $GetHTMLAttrsChecker->({
6557 ## XXXISSUE: In HTML5, no "MUST NOT" for using |action|, |method|,
6558 ## |enctype|, |target|, and |novalidate| with non-|submit|-|type|
6559 ## |button| elements.
6560 action => $HTMLURIAttrChecker,
6561 autofocus => $AutofocusAttrChecker,
6562 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6563 enctype => $GetHTMLEnumeratedAttrChecker->({
6564 'application/x-www-form-urlencoded' => 1,
6565 'multipart/form-data' => 1,
6566 'text/plain' => 1,
6567 }),
6568 form => $HTMLFormAttrChecker,
6569 formaction => $HTMLURIAttrChecker,
6570 formenctype => $GetHTMLEnumeratedAttrChecker->({
6571 'application/x-www-form-urlencoded' => 1,
6572 'multipart/form-data' => 1,
6573 'text/plain' => 1,
6574 }),
6575 formmethod => $GetHTMLEnumeratedAttrChecker->({
6576 get => 1, post => 1, put => 1, delete => 1,
6577 }),
6578 formnovalidate => $GetHTMLBooleanAttrChecker->('formnovalidate'),
6579 formtarget => $HTMLTargetAttrChecker,
6580 method => $GetHTMLEnumeratedAttrChecker->({
6581 get => 1, post => 1, put => 1, delete => 1,
6582 }),
6583 name => $FormControlNameAttrChecker,
6584 novalidate => $GetHTMLBooleanAttrChecker->('novalidate'),
6585 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
6586 onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
6587 replace => $GetHTMLEnumeratedAttrChecker->({document => 1, values => 1}),
6588 target => $HTMLTargetAttrChecker,
6589 ## NOTE: According to Web Forms 2.0, |button| attribute has |template|
6590 ## attribute to support the |add| button type (as part of repetition
6591 ## template feature). It conflicts with the |template| global attribute
6592 ## introduced as part of the data template feature.
6593 ## NOTE: |template| attribute as defined in Web Forms 2.0 has no
6594 ## author requirement.
6595 type => $GetHTMLEnumeratedAttrChecker->({
6596 button => 1, submit => 1, reset => 1,
6597 }),
6598 value => sub {}, ## NOTE: No restriction.
6599 }, {
6600 %HTMLAttrStatus,
6601 %HTMLM12NCommonAttrStatus,
6602 accesskey => FEATURE_M12N10_REC | FEATURE_HTML5_FD,
6603 action => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6604 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6605 datafld => FEATURE_HTML4_REC_RESERVED,
6606 dataformatas => FEATURE_HTML4_REC_RESERVED,
6607 datasrc => FEATURE_HTML4_REC_RESERVED,
6608 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6609 enctype => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6610 form => FEATURE_HTML5_LC | FEATURE_WF2X,
6611 formaction => FEATURE_HTML5_LC,
6612 formenctype => FEATURE_HTML5_LC,
6613 formmethod => FEATURE_HTML5_LC,
6614 formnovalidate => FEATURE_HTML5_LC,
6615 formtarget => FEATURE_HTML5_LC,
6616 lang => FEATURE_HTML5_REC,
6617 method => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6618 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6619 novalidate => FEATURE_HTML5_DROPPED,
6620 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6621 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6622 onformchange => FEATURE_WF2_INFORMATIVE,
6623 onforminput => FEATURE_WF2_INFORMATIVE,
6624 replace => FEATURE_WF2,
6625 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6626 target => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
6627 template => FEATURE_HTML5_DROPPED | FEATURE_WF2,
6628 type => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6629 value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6630 }), # check_attrs
6631 check_start => sub {
6632 my ($self, $item, $element_state) = @_;
6633 $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
6634 $FAECheckStart->($self, $item, $element_state);
6635
6636 ## XXXISSUE: "The value attribute must not be present unless the
6637 ## form [content] attribute is present.": Wrong? Maybe it should
6638 ## also be allowed when there is an ancestor |form| element.
6639
6640 $element_state->{uri_info}->{action}->{type}->{action} = 1;
6641 $element_state->{uri_info}->{formaction}->{type}->{action} = 1;
6642 $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6643 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6644 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6645 }, # check_start
6646 check_attrs2 => sub {
6647 my ($self, $item, $element_state) = @_;
6648 $FAECheckAttrs2->($self, $item, $element_state);
6649 }, # check_attrs2
6650 check_end => sub {
6651 my ($self, $item, $element_state) = @_;
6652 $self->_remove_minus_elements ($element_state);
6653
6654 $HTMLPhrasingContentChecker{check_end}->(@_);
6655 }, # check_end
6656 }; # button
6657
6658 $Element->{$HTML_NS}->{label} = {
6659 %HTMLPhrasingContentChecker,
6660 status => FEATURE_HTML5_REC,
6661 check_attrs => $GetHTMLAttrsChecker->({
6662 for => sub {
6663 my ($self, $attr) = @_;
6664
6665 ## NOTE: MUST be an ID of a labelable element.
6666
6667 push @{$self->{idref}}, ['labelable', $attr->value, $attr];
6668 },
6669 form => $HTMLFormAttrChecker,
6670 }, {
6671 %HTMLAttrStatus,
6672 %HTMLM12NXHTML2CommonAttrStatus,
6673 accesskey => FEATURE_HTML5_FD | FEATURE_WF2 | FEATURE_M12N10_REC,
6674 for => FEATURE_HTML5_REC,
6675 form => FEATURE_HTML5_LC,
6676 lang => FEATURE_HTML5_REC,
6677 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6678 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6679 }),
6680 check_start => sub {
6681 my ($self, $item, $element_state) = @_;
6682 $self->_add_minus_elements ($element_state, {$HTML_NS => {label => 1}});
6683
6684 ## If $self->{flag}->{has_label} is true, then there is at least
6685 ## an ancestor |label| element.
6686
6687 ## If $self->{flag}->{has_labelable} is equal to 1, then there is
6688 ## an ancestor |label| element with its |for| attribute specified.
6689 ## If the value is equal to 2, then there is an ancestor |label|
6690 ## element with its |for| attribute unspecified but there is an
6691 ## associated form control element.
6692
6693 $element_state->{has_label_original} = $self->{flag}->{has_label};
6694 $element_state->{has_labelable_original} = $self->{flag}->{has_labelable};
6695 $element_state->{label_for_original} = $self->{flag}->{label_for};
6696
6697 $self->{flag}->{has_label} = 1;
6698 $self->{flag}->{has_labelable}
6699 = $item->{node}->has_attribute_ns (undef, 'for') ? 1 : 0;
6700 $self->{flag}->{label_for}
6701 = $item->{node}->get_attribute_ns (undef, 'for');
6702
6703 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6704 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6705 },
6706 check_end => sub {
6707 my ($self, $item, $element_state) = @_;
6708 $self->_remove_minus_elements ($element_state);
6709
6710 if ($self->{flag}->{has_labelable} == 1) { # has for="" but no labelable
6711 $self->{flag}->{has_labelable}
6712 = $element_state->{has_labelable_original};
6713 }
6714 delete $self->{flag}->{has_label}
6715 unless $element_state->{has_label_original};
6716 $self->{flag}->{label_for} = $element_state->{label_for_original};
6717
6718 ## TODO: Warn if no labelable descendant? <input type=hidden>?
6719
6720 ## NOTE: |<label for=a><input id=a></label>| is non-conforming.
6721
6722 $HTMLPhrasingContentChecker{check_end}->(@_);
6723 },
6724 }; # label
6725
6726 $Element->{$HTML_NS}->{select} = {
6727 %HTMLChecker,
6728 ## ISSUE: HTML5 has no requirement like these:
6729 ## TODO: author should SELECTED at least one OPTION in non-MULTIPLE case [HTML4].
6730 ## TODO: more than one OPTION with SELECTED in non-MULTIPLE case is "error" [HTML4]
6731 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6732 is_root => 1, ## TODO: SHOULD NOT in application/xhtml+xml [WF2]
6733 check_attrs => $GetHTMLAttrsChecker->({
6734 autofocus => $AutofocusAttrChecker,
6735 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6736 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6737 form => $HTMLFormAttrChecker,
6738 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
6739 name => $FormControlNameAttrChecker,
6740 ## TODO: tests for on*
6741 onformchange => $HTMLEventHandlerAttrChecker,
6742 onforminput => $HTMLEventHandlerAttrChecker,
6743 oninput => $HTMLEventHandlerAttrChecker,
6744 oninvalid => $HTMLEventHandlerAttrChecker,
6745 size => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6746 }, {
6747 %HTMLAttrStatus,
6748 %HTMLM12NCommonAttrStatus,
6749 accesskey => FEATURE_HTML5_FD | FEATURE_WF2,
6750 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
6751 data => FEATURE_WF2,
6752 datafld => FEATURE_HTML4_REC_RESERVED,
6753 dataformatas => FEATURE_HTML4_REC_RESERVED,
6754 datasrc => FEATURE_HTML4_REC_RESERVED,
6755 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6756 form => FEATURE_HTML5_LC | FEATURE_WF2X,
6757 lang => FEATURE_HTML5_REC,
6758 multiple => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6759 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6760 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6761 onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6762 onformchange => FEATURE_WF2_INFORMATIVE,
6763 onforminput => FEATURE_WF2_INFORMATIVE,
6764 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6765 oninput => FEATURE_WF2,
6766 oninvalid => FEATURE_WF2,
6767 sdaform => FEATURE_HTML20_RFC,
6768 sdapref => FEATURE_HTML20_RFC,
6769 size => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6770 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
6771 }),
6772 check_start => sub {
6773 my ($self, $item, $element_state) = @_;
6774 $FAECheckStart->($self, $item, $element_state);
6775
6776 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6777 $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
6778 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6779 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6780 }, # check_start
6781 check_attrs2 => sub {
6782 my ($self, $item, $element_state) = @_;
6783 $FAECheckAttrs2->($self, $item, $element_state);
6784 }, # check_attrs2
6785 check_child_element => sub {
6786 ## NOTE: (option | optgroup)*
6787
6788 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6789 $child_is_transparent, $element_state) = @_;
6790 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6791 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6792 $self->{onerror}->(node => $child_el,
6793 type => 'element not allowed:minus',
6794 level => $self->{level}->{must});
6795 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6796 #
6797 } elsif ($child_nsuri eq $HTML_NS and
6798 {
6799 option => 1, optgroup => 1,
6800 }->{$child_ln}) {
6801 #
6802 } else {
6803 $self->{onerror}->(node => $child_el, type => 'element not allowed',
6804 level => $self->{level}->{must});
6805 }
6806 },
6807 check_child_text => sub {
6808 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6809 if ($has_significant) {
6810 $self->{onerror}->(node => $child_node, type => 'character not allowed',
6811 level => $self->{level}->{must});
6812 }
6813 },
6814 };
6815
6816 $Element->{$HTML_NS}->{datalist} = {
6817 %HTMLPhrasingContentChecker,
6818 status => FEATURE_HTML5_LC | FEATURE_WF2X,
6819 check_attrs => $GetHTMLAttrsChecker->({
6820 data => $HTMLURIAttrChecker, ## TODO: MUST point ... [WF2]
6821 }, {
6822 %HTMLAttrStatus,
6823 data => FEATURE_WF2,
6824 }),
6825 check_start => sub {
6826 my ($self, $item, $element_state) = @_;
6827
6828 $element_state->{phase} = 'any'; # any | phrasing | option
6829
6830 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
6831 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
6832 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
6833
6834 $element_state->{id_type} = 'datalist';
6835 },
6836 ## NOTE: phrasing | option*
6837 check_child_element => sub {
6838 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6839 $child_is_transparent, $element_state) = @_;
6840 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6841 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6842 $self->{onerror}->(node => $child_el,
6843 type => 'element not allowed:minus',
6844 level => $self->{level}->{must});
6845 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6846 #
6847 } elsif ($element_state->{phase} eq 'phrasing') {
6848 if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6849 #
6850 } else {
6851 $self->{onerror}->(node => $child_el,
6852 type => 'element not allowed:phrasing',
6853 level => $self->{level}->{must});
6854 }
6855 } elsif ($element_state->{phase} eq 'option') {
6856 if ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6857 #
6858 } else {
6859 $self->{onerror}->(node => $child_el,
6860 type => 'element not allowed',
6861 level => $self->{level}->{must});
6862 }
6863 } elsif ($element_state->{phase} eq 'any') {
6864 if ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
6865 $element_state->{phase} = 'phrasing';
6866 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6867 $element_state->{phase} = 'option';
6868 } else {
6869 $self->{onerror}->(node => $child_el,
6870 type => 'element not allowed',
6871 level => $self->{level}->{must});
6872 }
6873 } else {
6874 die "check_child_element: Bad |datalist| phase: $element_state->{phase}";
6875 }
6876 },
6877 check_child_text => sub {
6878 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6879 if ($has_significant) {
6880 if ($element_state->{phase} eq 'phrasing') {
6881 #
6882 } elsif ($element_state->{phase} eq 'any') {
6883 $element_state->{phase} = 'phrasing';
6884 } else {
6885 $self->{onerror}->(node => $child_node,
6886 type => 'character not allowed',
6887 level => $self->{level}->{must});
6888 }
6889 }
6890 },
6891 check_end => sub {
6892 my ($self, $item, $element_state) = @_;
6893 if ($element_state->{phase} eq 'phrasing') {
6894 if ($element_state->{has_significant}) {
6895 $item->{real_parent_state}->{has_significant} = 1;
6896 } elsif ($item->{transparent}) {
6897 #
6898 } else {
6899 $self->{onerror}->(node => $item->{node},
6900 type => 'no significant content',
6901 level => $self->{level}->{should});
6902 }
6903 } else {
6904 ## NOTE: Since the content model explicitly allows a |datalist| element
6905 ## being empty, we don't raise "no significant content" error for this
6906 ## element when there is no element. (We should raise an error for
6907 ## |<datalist><br></datalist>|, however.)
6908 ## NOTE: As a side-effect, when the |datalist| element only contains
6909 ## non-conforming content, then the |phase| flag has not changed from
6910 ## |any|, no "no significant content" error is raised neither.
6911 $HTMLChecker{check_end}->(@_);
6912 }
6913 },
6914 };
6915
6916 $Element->{$HTML_NS}->{optgroup} = {
6917 %HTMLChecker,
6918 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6919 check_attrs => $GetHTMLAttrsChecker->({
6920 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6921 label => sub {},
6922 }, {
6923 %HTMLAttrStatus,
6924 %HTMLM12NCommonAttrStatus,
6925 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6926 label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6927 lang => FEATURE_HTML5_REC,
6928 }),
6929 check_attrs2 => sub {
6930 my ($self, $item, $element_state) = @_;
6931
6932 unless ($item->{node}->has_attribute_ns (undef, 'label')) {
6933 $self->{onerror}->(node => $item->{node},
6934 type => 'attribute missing',
6935 text => 'label',
6936 level => $self->{level}->{must});
6937 }
6938 },
6939 check_child_element => sub {
6940 my ($self, $item, $child_el, $child_nsuri, $child_ln,
6941 $child_is_transparent, $element_state) = @_;
6942 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
6943 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
6944 $self->{onerror}->(node => $child_el,
6945 type => 'element not allowed:minus',
6946 level => $self->{level}->{must});
6947 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
6948 #
6949 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'option') {
6950 #
6951 } else {
6952 $self->{onerror}->(node => $child_el, type => 'element not allowed',
6953 level => $self->{level}->{must});
6954 }
6955 },
6956 check_child_text => sub {
6957 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
6958 if ($has_significant) {
6959 $self->{onerror}->(node => $child_node, type => 'character not allowed',
6960 level => $self->{level}->{must});
6961 }
6962 },
6963 };
6964
6965 $Element->{$HTML_NS}->{option} = {
6966 %HTMLTextChecker,
6967 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6968 check_attrs => $GetHTMLAttrsChecker->({
6969 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6970 label => sub {}, ## NOTE: No restriction.
6971 selected => $GetHTMLBooleanAttrChecker->('selected'), ## ISSUE: Not a "boolean attribute"
6972 value => sub {}, ## NOTE: No restriction.
6973 }, {
6974 %HTMLAttrStatus,
6975 %HTMLM12NCommonAttrStatus,
6976 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6977 label => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6978 lang => FEATURE_HTML5_REC,
6979 sdaform => FEATURE_HTML20_RFC,
6980 sdapref => FEATURE_HTML20_RFC,
6981 selected => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6982 value => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
6983 }),
6984 };
6985
6986 $Element->{$HTML_NS}->{textarea} = {
6987 %HTMLTextChecker,
6988 status => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
6989 check_attrs => $GetHTMLAttrsChecker->({
6990 accept => $HTMLIMTAttrChecker, ## TODO: MUST be a text-based type [WF2]
6991 autofocus => $AutofocusAttrChecker,
6992 cols => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
6993 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
6994 form => $HTMLFormAttrChecker,
6995 ## TODO: inputmode [WF2]
6996 maxlength => sub {
6997 my ($self, $attr, $item, $element_state) = @_;
6998
6999 $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 })->(@_);
7000
7001 if ($attr->value =~ /^[\x09\x0A\x0C\x0D\x20]*([0-9]+)/) {
7002 ## NOTE: Applying the rules for parsing non-negative integers
7003 ## results in a number.
7004 my $max_allowed_value_length = 0+$1;
7005
7006 ## ISSUE: "The the purposes of this requirement," (typo)
7007
7008 ## ISSUE: This constraint is applied w/o CRLF normalization to
7009 ## |value| attribute, but w/ CRLF normalization to
7010 ## concept-value.
7011 my $value = $item->{node}->text_content;
7012 if (defined $value) {
7013 my $codepoint_length = length $value;
7014
7015 if ($codepoint_length > $max_allowed_value_length) {
7016 $self->{onerror}->(node => $item->{node},
7017 type => 'value too long',
7018 level => $self->{level}->{must});
7019 }
7020 }
7021 }
7022 },
7023 name => $FormControlNameAttrChecker,
7024 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
7025 onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
7026 oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
7027 pattern => $PatternAttrChecker,
7028 placeholder => $PlaceholderAttrChecker,
7029 readonly => $GetHTMLBooleanAttrChecker->('readonly'),
7030 required => $GetHTMLBooleanAttrChecker->('required'),
7031 rows => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
7032 oninput => $HTMLEventHandlerAttrChecker, ## TODO: tests
7033 oninvalid => $HTMLEventHandlerAttrChecker, ## TODO: tests
7034 ## NOTE: |title| had special semantics if |pattern| was specified [WF2].
7035 wrap => $GetHTMLEnumeratedAttrChecker->({soft => 1, hard => 1}),
7036 }, {
7037 %HTMLAttrStatus,
7038 %HTMLM12NCommonAttrStatus,
7039 accept => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
7040 'accept-charset' => FEATURE_HTML2X_RFC,
7041 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7042 autofocus => FEATURE_HTML5_LC | FEATURE_WF2X,
7043 cols => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7044 datafld => FEATURE_HTML4_REC_RESERVED,
7045 dataformatas => FEATURE_HTML4_REC_RESERVED,
7046 datasrc => FEATURE_HTML4_REC_RESERVED,
7047 disabled => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
7048 form => FEATURE_HTML5_LC | FEATURE_WF2X,
7049 inputmode => FEATURE_HTML5_DROPPED | FEATURE_WF2X | FEATURE_XHTMLBASIC11_CR,
7050 lang => FEATURE_HTML5_REC,
7051 maxlength => FEATURE_HTML5_DEFAULT | FEATURE_WF2X,
7052 name => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7053 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
7054 onchange => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
7055 onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
7056 onformchange => FEATURE_WF2_INFORMATIVE, ## TODO: tests
7057 onforminput => FEATURE_WF2_INFORMATIVE, ## TODO: tests
7058 oninput => FEATURE_WF2, ## TODO: tests
7059 oninvalid => FEATURE_WF2, ## TODO: tests
7060 onselect => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
7061 pattern => FEATURE_HTML5_DROPPED | FEATURE_WF2X,
7062 placeholder => FEATURE_HTML5_LC,
7063 readonly => FEATURE_HTML5_LC | FEATURE_WF2X | FEATURE_M12N10_REC,
7064 required => FEATURE_HTML5_LC | FEATURE_WF2X,
7065 rows => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7066 sdaform => FEATURE_HTML20_RFC,
7067 sdapref => FEATURE_HTML20_RFC,
7068 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
7069 wrap => FEATURE_HTML5_LC | FEATURE_WF2X,
7070 }),
7071 check_start => sub {
7072 my ($self, $item, $element_state) = @_;
7073 $FAECheckStart->($self, $item, $element_state);
7074
7075 $element_state->{uri_info}->{data}->{type}->{resource} = 1;
7076 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7077 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7078 },
7079 check_attrs2 => sub {
7080 my ($self, $item, $element_state) = @_;
7081
7082 if ($item->{node}->has_attribute_ns (undef, 'pattern') and
7083 not $item->{node}->has_attribute_ns (undef, 'title')) {
7084 ## NOTE: WF2 (dropped by HTML5)
7085 $self->{onerror}->(node => $item->{node},
7086 type => 'attribute missing',
7087 text => 'title',
7088 level => $self->{level}->{should});
7089 }
7090
7091 unless ($item->{node}->has_attribute_ns (undef, 'cols')) {
7092 my $wrap = $item->{node}->get_attribute_ns (undef, 'wrap');
7093 if (defined $wrap) {
7094 $wrap =~ tr/A-Z/a-z/; ## ASCII case-insensitive
7095 if ($wrap eq 'hard') {
7096 $self->{onerror}->(node => $item->{node},
7097 type => 'attribute missing',
7098 text => 'cols',
7099 level => $self->{level}->{must});
7100 }
7101 }
7102 }
7103
7104 $FAECheckAttrs2->($self, $item, $element_state);
7105 }, # check_attrs2
7106 }; # textarea
7107
7108 $Element->{$HTML_NS}->{keygen} = {
7109 %HTMLEmptyChecker,
7110 status => FEATURE_HTML5_FD,
7111 check_attrs => $GetHTMLAttrsChecker->({
7112 autofocus => $AutofocusAttrChecker,
7113 challenge => sub { }, ## No constraints.
7114 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7115 form => $HTMLFormAttrChecker,
7116 keytype => $GetHTMLEnumeratedAttrChecker->({rsa => 1}),
7117 name => $FormControlNameAttrChecker,
7118 }, {
7119 %HTMLAttrStatus,
7120 autofocus => FEATURE_HTML5_LC,
7121 challenge => FEATURE_HTML5_FD,
7122 disabled => FEATURE_HTML5_LC,
7123 form => FEATURE_HTML5_LC,
7124 keytype => FEATURE_HTML5_FD,
7125 name => FEATURE_HTML5_LC,
7126 }), # check_attrs
7127 check_start => sub {
7128 my ($self, $item, $element_state) = @_;
7129 $FAECheckStart->($self, $item, $element_state);
7130
7131 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7132 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7133 }, # check_start
7134 check_attrs2 => sub {
7135 my ($self, $item, $element_state) = @_;
7136 $FAECheckAttrs2->($self, $item, $element_state);
7137 }, # check_attrs2
7138 }; # keygen
7139
7140 $Element->{$HTML_NS}->{output} = {
7141 %HTMLPhrasingContentChecker,
7142 status => FEATURE_HTML5_LC | FEATURE_WF2X,
7143 check_attrs => $GetHTMLAttrsChecker->({
7144 for => sub {
7145 my ($self, $attr) = @_;
7146
7147 ## NOTE: "Unordered set of unique space-separated tokens".
7148
7149 my %word;
7150 for my $word (grep {length $_}
7151 split /[\x09\x0A\x0C\x0D\x20]+/, $attr->value) {
7152 unless ($word{$word}) {
7153 $word{$word} = 1;
7154 push @{$self->{idref}}, ['any', $word, $attr];
7155 } else {
7156 $self->{onerror}->(node => $attr, type => 'duplicate token',
7157 value => $word,
7158 level => $self->{level}->{must});
7159 }
7160 }
7161 },
7162 form => $HTMLFormAttrChecker,
7163 name => $FormControlNameAttrChecker,
7164 onformchange => $HTMLEventHandlerAttrChecker, ## TODO: tests
7165 onforminput => $HTMLEventHandlerAttrChecker, ## TODO: tests
7166 }, {
7167 %HTMLAttrStatus,
7168 for => FEATURE_HTML5_LC | FEATURE_WF2X,
7169 form => FEATURE_HTML5_LC | FEATURE_WF2X,
7170 name => FEATURE_HTML5_LC | FEATURE_WF2X,
7171 onchange => FEATURE_HTML5_DEFAULT | FEATURE_WF2,
7172 onformchange => FEATURE_WF2,
7173 onforminput => FEATURE_WF2,
7174 }),
7175 };
7176
7177 $Element->{$HTML_NS}->{isindex} = {
7178 %HTMLEmptyChecker,
7179 status => FEATURE_M12N10_REC_DEPRECATED |
7180 Whatpm::ContentChecker::FEATURE_DEPRECATED_SHOULD, ## [HTML4]
7181 check_attrs => $GetHTMLAttrsChecker->({
7182 prompt => sub {}, ## NOTE: Text [M12N]
7183 }, {
7184 %HTMLAttrStatus,
7185 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7186 dir => FEATURE_HTML5_REC,
7187 id => FEATURE_HTML5_REC,
7188 lang => FEATURE_HTML5_REC,
7189 prompt => FEATURE_M12N10_REC_DEPRECATED,
7190 sdapref => FEATURE_HTML20_RFC,
7191 style => FEATURE_HTML5_REC,
7192 title => FEATURE_HTML5_REC,
7193 }),
7194 ## TODO: Tests
7195 ## TODO: Tests for <nest/> in <isindex>
7196 check_start => sub {
7197 my ($self, $item, $element_state) = @_;
7198
7199 $element_state->{uri_info}->{action}->{type}->{action} = 1;
7200 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7201 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7202 },
7203 };
7204
7205 $Element->{$HTML_NS}->{script} = {
7206 %HTMLChecker,
7207 status => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7208 check_attrs => $GetHTMLAttrsChecker->({
7209 charset => sub {
7210 my ($self, $attr) = @_;
7211
7212 unless ($attr->owner_element->has_attribute_ns (undef, 'src')) {
7213 $self->{onerror}->(type => 'attribute not allowed',
7214 node => $attr,
7215 level => $self->{level}->{must});
7216 }
7217
7218 ## XXXresource: MUST match the charset of the referenced
7219 ## resource (HTML5 revision 2967).
7220
7221 $HTMLCharsetChecker->($attr->value, @_);
7222 },
7223 language => sub {}, ## NOTE: No syntax constraint according to HTML4.
7224 src => $HTMLURIAttrChecker, ## TODO: pointed resource MUST be in type of type="" (resource error)
7225 defer => $GetHTMLBooleanAttrChecker->('defer'),
7226 async => $GetHTMLBooleanAttrChecker->('async'),
7227 type => $HTMLIMTAttrChecker, ## TODO: MUST NOT: |charset=""| parameter
7228 }, {
7229 %HTMLAttrStatus,
7230 async => FEATURE_HTML5_WD,
7231 charset => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7232 defer => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7233 event => FEATURE_HTML4_REC_RESERVED,
7234 for => FEATURE_HTML4_REC_RESERVED,
7235 href => FEATURE_RDFA_REC,
7236 id => FEATURE_HTML5_REC,
7237 language => FEATURE_M12N10_REC_DEPRECATED,
7238 src => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7239 type => FEATURE_HTML5_WD | FEATURE_M12N10_REC,
7240 }),
7241 check_attrs2 => sub {
7242 my ($self, $item, $element_state) = @_;
7243
7244 my $el = $item->{node};
7245 if ($el->has_attribute_ns (undef, 'defer') and
7246 not $el->has_attribute_ns (undef, 'src')) {
7247 $self->{onerror}->(node => $el,
7248 type => 'attribute missing',
7249 text => 'src',
7250 level => $self->{level}->{must});
7251 }
7252 },
7253 check_start => sub {
7254 my ($self, $item, $element_state) = @_;
7255
7256 if ($item->{node}->has_attribute_ns (undef, 'src')) {
7257 $element_state->{must_be_empty} = 1;
7258 } else {
7259 ## NOTE: No content model conformance in HTML5 spec.
7260 my $type = $item->{node}->get_attribute_ns (undef, 'type');
7261 my $language = $item->{node}->get_attribute_ns (undef, 'language');
7262 if ((defined $type and $type eq '') or
7263 (defined $language and $language eq '')) {
7264 $type = 'text/javascript';
7265 } elsif (defined $type) {
7266 #
7267 } elsif (defined $language) {
7268 $type = 'text/' . $language;
7269 } else {
7270 $type = 'text/javascript';
7271 }
7272
7273 if ($type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*([\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+)(?>(?>\x0D\x0A)?[\x09\x20])*(?>;|\z)]) {
7274 $type = "$1/$2";
7275 $type =~ tr/A-Z/a-z/; ## NOTE: ASCII case-insensitive
7276 ## TODO: Though we strip prameter here, it should not be ignored for the purpose of conformance checking...
7277 }
7278 $element_state->{script_type} = $type;
7279 }
7280
7281 $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7282 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7283 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7284
7285 $element_state->{text} = '';
7286 },
7287 check_child_element => sub {
7288 my ($self, $item, $child_el, $child_nsuri, $child_ln,
7289 $child_is_transparent, $element_state) = @_;
7290 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7291 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7292 $self->{onerror}->(node => $child_el,
7293 type => 'element not allowed:minus',
7294 level => $self->{level}->{must});
7295 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7296 #
7297 } else {
7298 if ($element_state->{must_be_empty}) {
7299 $self->{onerror}->(node => $child_el,
7300 type => 'element not allowed:empty',
7301 level => $self->{level}->{must});
7302 }
7303 }
7304 },
7305 check_child_text => sub {
7306 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7307 if ($has_significant and
7308 $element_state->{must_be_empty}) {
7309 $self->{onerror}->(node => $child_node,
7310 type => 'character not allowed:empty',
7311 level => $self->{level}->{must});
7312 }
7313 $element_state->{text} .= $child_node->data;
7314 },
7315 check_end => sub {
7316 my ($self, $item, $element_state) = @_;
7317 unless ($element_state->{must_be_empty}) {
7318 if ($element_state->{script_type} =~ m![+/][Xx][Mm][Ll]\z!) {
7319 ## NOTE: XML content should be checked by THIS instance of checker
7320 ## as part of normal tree validation.
7321 $self->{onerror}->(node => $item->{node},
7322 type => 'XML script lang',
7323 text => $element_state->{script_type},
7324 level => $self->{level}->{uncertain});
7325 ## ISSUE: Should we raise some kind of error for
7326 ## <script type="text/xml">aaaaa</script>?
7327 ## NOTE: ^^^ This is why we throw an "uncertain" error.
7328 } else {
7329 $self->{onsubdoc}->({s => $element_state->{text},
7330 container_node => $item->{node},
7331 media_type => $element_state->{script_type},
7332 is_char_string => 1});
7333 }
7334
7335 $HTMLChecker{check_end}->(@_);
7336 }
7337 },
7338 ## TODO: There MUST be |type| unless the script type is JavaScript. (resource error)
7339 ## NOTE: "When used to include script data, the script data must be embedded
7340 ## inline, the format of the data must be given using the type attribute,
7341 ## and the src attribute must not be specified." - not testable.
7342 ## TODO: It would be possible to err <script type=text/plain src=...>
7343 };
7344 ## ISSUE: Significant check and text child node
7345
7346 ## NOTE: When script is disabled.
7347 $Element->{$HTML_NS}->{noscript} = {
7348 %HTMLTransparentChecker,
7349 status => FEATURE_HTML5_REC,
7350 check_attrs => $GetHTMLAttrsChecker->({}, {
7351 %HTMLAttrStatus,
7352 %HTMLM12NCommonAttrStatus,
7353 lang => FEATURE_HTML5_REC,
7354 }),
7355 check_start => sub {
7356 my ($self, $item, $element_state) = @_;
7357
7358 unless ($item->{node}->owner_document->manakai_is_html) {
7359 $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript',
7360 level => $self->{level}->{must});
7361 }
7362
7363 unless ($self->{flag}->{in_head}) {
7364 $self->_add_minus_elements ($element_state,
7365 {$HTML_NS => {noscript => 1}});
7366 }
7367
7368 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7369 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7370 },
7371 check_child_element => sub {
7372 my ($self, $item, $child_el, $child_nsuri, $child_ln,
7373 $child_is_transparent, $element_state) = @_;
7374 if ($self->{flag}->{in_head}) {
7375 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7376 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7377 $self->{onerror}->(node => $child_el,
7378 type => 'element not allowed:minus',
7379 level => $self->{level}->{must});
7380 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7381 #
7382 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
7383 #
7384 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
7385 if ($child_el->has_attribute_ns (undef, 'scoped')) {
7386 $self->{onerror}->(node => $child_el,
7387 type => 'element not allowed:head noscript',
7388 level => $self->{level}->{must});
7389 }
7390 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
7391 my $http_equiv_attr
7392 = $child_el->get_attribute_node_ns (undef, 'http-equiv');
7393 if ($http_equiv_attr) {
7394 ## TODO: case
7395 if (lc $http_equiv_attr->value eq 'content-type') {
7396 $self->{onerror}->(node => $child_el,
7397 type => 'element not allowed:head noscript',
7398 level => $self->{level}->{must});
7399 } else {
7400 #
7401 }
7402 } else {
7403 $self->{onerror}->(node => $child_el,
7404 type => 'element not allowed:head noscript',
7405 level => $self->{level}->{must});
7406 }
7407 } else {
7408 $self->{onerror}->(node => $child_el,
7409 type => 'element not allowed:head noscript',
7410 level => $self->{level}->{must});
7411 }
7412 } else {
7413 $HTMLTransparentChecker{check_child_element}->(@_);
7414 }
7415 },
7416 check_child_text => sub {
7417 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7418 if ($self->{flag}->{in_head}) {
7419 if ($has_significant) {
7420 $self->{onerror}->(node => $child_node,
7421 type => 'character not allowed',
7422 level => $self->{level}->{must});
7423 }
7424 } else {
7425 $HTMLTransparentChecker{check_child_text}->(@_);
7426 }
7427 },
7428 check_end => sub {
7429 my ($self, $item, $element_state) = @_;
7430 $self->_remove_minus_elements ($element_state);
7431 if ($self->{flag}->{in_head}) {
7432 $HTMLChecker{check_end}->(@_);
7433 } else {
7434 $HTMLPhrasingContentChecker{check_end}->(@_);
7435 }
7436 },
7437 };
7438 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
7439
7440 $Element->{$HTML_NS}->{'event-source'} = {
7441 %HTMLEmptyChecker,
7442 status => FEATURE_HTML5_LC_DROPPED,
7443 check_attrs => $GetHTMLAttrsChecker->({
7444 src => $HTMLURIAttrChecker,
7445 }, {
7446 %HTMLAttrStatus,
7447 src => FEATURE_HTML5_LC_DROPPED,
7448 }),
7449 check_start => sub {
7450 my ($self, $item, $element_state) = @_;
7451
7452 $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7453 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7454 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7455 },
7456 };
7457
7458 $Element->{$HTML_NS}->{eventsource} = {
7459 %HTMLEmptyChecker,
7460 status => FEATURE_HTML5_DROPPED,
7461 check_attrs => $GetHTMLAttrsChecker->({
7462 src => $HTMLURIAttrChecker,
7463 }, {
7464 %HTMLAttrStatus,
7465 src => FEATURE_HTML5_DROPPED,
7466 }),
7467 check_start => sub {
7468 my ($self, $item, $element_state) = @_;
7469
7470 $element_state->{uri_info}->{src}->{type}->{resource} = 1;
7471 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7472 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7473 },
7474 };
7475
7476 $Element->{$HTML_NS}->{details} = {
7477 %{$Element->{$HTML_NS}->{fieldset}},
7478 status => FEATURE_HTML5_LC,
7479 check_attrs => $GetHTMLAttrsChecker->({
7480 open => $GetHTMLBooleanAttrChecker->('open'),
7481 }, {
7482 %HTMLAttrStatus,
7483 open => FEATURE_HTML5_LC,
7484 }),
7485 };
7486
7487 $Element->{$HTML_NS}->{datagrid} = {
7488 %HTMLFlowContentChecker,
7489 status => FEATURE_HTML5_DROPPED,
7490 check_attrs => $GetHTMLAttrsChecker->({
7491 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7492 multiple => $GetHTMLBooleanAttrChecker->('multiple'),
7493 }, {
7494 %HTMLAttrStatus,
7495 disabled => FEATURE_HTML5_DROPPED,
7496 multiple => FEATURE_HTML5_DROPPED,
7497 }), # check_attrs
7498 }; # datagrid
7499
7500 $Element->{$HTML_NS}->{command} = {
7501 %HTMLEmptyChecker,
7502 status => FEATURE_HTML5_WD,
7503 check_attrs => $GetHTMLAttrsChecker->({
7504 checked => $GetHTMLBooleanAttrChecker->('checked'),
7505 default => $GetHTMLBooleanAttrChecker->('default'),
7506 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
7507 icon => $HTMLURIAttrChecker,
7508 label => sub { }, ## NOTE: No requirement
7509 radiogroup => sub { }, ## NOTE: No requirement for the value
7510 type => $GetHTMLEnumeratedAttrChecker->({
7511 command => 1, checkbox => 1, radio => 1,
7512 }),
7513 }, {
7514 %HTMLAttrStatus,
7515 checked => FEATURE_HTML5_WD,
7516 default => FEATURE_HTML5_DROPPED, # HTML5 revision 3067
7517 disabled => FEATURE_HTML5_WD,
7518 icon => FEATURE_HTML5_WD,
7519 label => FEATURE_HTML5_WD,
7520 radiogroup => FEATURE_HTML5_WD,
7521 type => FEATURE_HTML5_WD,
7522 }), # check_attrs
7523 check_attrs2 => sub {
7524 my ($self, $item, $element_state) = @_;
7525
7526 my $type = $item->{node}->get_attribute_ns (undef, 'type') || '';
7527 $type =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
7528 $type = 'command' unless $type eq 'radio' or $type eq 'checkbox';
7529
7530 unless ($type eq 'radio') {
7531 my $rg_attr = $item->{node}->get_attribute_node_ns (undef, 'radiogroup');
7532 if ($rg_attr) {
7533 $self->{onerror}->(node => $rg_attr,
7534 type => 'attribute not allowed:radiogroup',
7535 level => $self->{level}->{must});
7536 }
7537 }
7538
7539 unless ($type eq 'checkbox' or $type eq 'radio') {
7540 my $cd_attr = $item->{node}->get_attribute_node_ns (undef, 'checked');
7541 if ($cd_attr) {
7542 $self->{onerror}->(node => $cd_attr,
7543 type => 'attribute not allowed:checked',
7544 level => $self->{level}->{must});
7545 }
7546 }
7547
7548 unless ($type eq 'command') {
7549 my $def_attr = $item->{node}->get_attribute_node_ns (undef, 'default');
7550 if ($def_attr) {
7551 ## HTML5 revision 2415
7552 $self->{onerror}->(node => $def_attr,
7553 type => 'attribute not allowed:default',
7554 level => $self->{level}->{must});
7555 }
7556 }
7557 }, # check_attrs2
7558 check_start => sub {
7559 my ($self, $item, $element_state) = @_;
7560
7561 $element_state->{uri_info}->{icon}->{type}->{embedded} = 1;
7562 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7563 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7564 }, # check_start
7565 }; # command
7566
7567 $Element->{$HTML_NS}->{bb} = {
7568 %HTMLPhrasingContentChecker,
7569 status => FEATURE_HTML5_DROPPED,
7570 check_attrs => $GetHTMLAttrsChecker->({
7571 type => $GetHTMLEnumeratedAttrChecker->({makeapp => 1}),
7572 }, {
7573 %HTMLAttrStatus,
7574 type => FEATURE_HTML5_DROPPED,
7575 }),
7576 check_start => sub {
7577 my ($self, $item, $element_state) = @_;
7578 $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
7579
7580 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7581 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7582 },
7583 check_end => sub {
7584 my ($self, $item, $element_state) = @_;
7585 $self->_remove_minus_elements ($element_state);
7586
7587 $HTMLTransparentChecker{check_end}->(@_);
7588 },
7589 };
7590
7591 $Element->{$HTML_NS}->{menu} = {
7592 %HTMLPhrasingContentChecker,
7593 #status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
7594 status => FEATURE_M12N10_REC | FEATURE_HTML5_WD,
7595 ## NOTE: We don't want any |menu| element warned as deprecated.
7596 check_attrs => $GetHTMLAttrsChecker->({
7597 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
7598 compact => $GetHTMLBooleanAttrChecker->('compact'),
7599 ## ISSUE: <menu id=""><p contextmenu=""> match? (In the current
7600 ## implementation, it does not match.)
7601 label => sub { }, ## NOTE: No conformance creteria
7602 type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
7603 }, {
7604 %HTMLAttrStatus,
7605 %HTMLM12NCommonAttrStatus,
7606 align => FEATURE_HTML2X_RFC,
7607 autosubmit => FEATURE_HTML5_DROPPED,
7608 compat => FEATURE_M12N10_REC_DEPRECATED,
7609 label => FEATURE_HTML5_WD,
7610 lang => FEATURE_HTML5_REC,
7611 sdaform => FEATURE_HTML20_RFC,
7612 sdapref => FEATURE_HTML20_RFC,
7613 type => FEATURE_HTML5_WD,
7614 }), # check_attrs
7615 check_start => sub {
7616 my ($self, $item, $element_state) = @_;
7617 $element_state->{phase} = 'li or phrasing';
7618
7619 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7620 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7621 $element_state->{id_type} = 'menu';
7622 }, # check_start
7623 check_child_element => sub {
7624 my ($self, $item, $child_el, $child_nsuri, $child_ln,
7625 $child_is_transparent, $element_state) = @_;
7626 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7627 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7628 $self->{onerror}->(node => $child_el,
7629 type => 'element not allowed:minus',
7630 level => $self->{level}->{must});
7631 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7632 #
7633 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
7634 if ($element_state->{phase} eq 'li') {
7635 #
7636 } elsif ($element_state->{phase} eq 'li or phrasing') {
7637 $element_state->{phase} = 'li';
7638 } else {
7639 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7640 level => $self->{level}->{must});
7641 }
7642 } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
7643 if ($element_state->{phase} eq 'phrasing') {
7644 #
7645 } elsif ($element_state->{phase} eq 'li or phrasing') {
7646 $element_state->{phase} = 'phrasing';
7647 } else {
7648 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7649 level => $self->{level}->{must});
7650 }
7651 } else {
7652 $self->{onerror}->(node => $child_el, type => 'element not allowed',
7653 level => $self->{level}->{must});
7654 }
7655 }, # check_child_element
7656 check_child_text => sub {
7657 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7658 if ($has_significant) {
7659 if ($element_state->{phase} eq 'phrasing') {
7660 #
7661 } elsif ($element_state->{phase} eq 'li or phrasing') {
7662 $element_state->{phase} = 'phrasing';
7663 } else {
7664 $self->{onerror}->(node => $child_node,
7665 type => 'character not allowed',
7666 level => $self->{level}->{must});
7667 }
7668 }
7669 }, # check_child_text
7670 check_end => sub {
7671 my ($self, $item, $element_state) = @_;
7672 if ($element_state->{phase} eq 'li') {
7673 $HTMLChecker{check_end}->(@_);
7674 } else { # 'phrasing' or 'li or phrasing'
7675 $HTMLPhrasingContentChecker{check_end}->(@_);
7676 }
7677 }, # check_end
7678 }; # menu
7679
7680 $Element->{$HTML_NS}->{datatemplate} = {
7681 %HTMLChecker,
7682 status => FEATURE_HTML5_DROPPED,
7683 check_child_element => sub {
7684 my ($self, $item, $child_el, $child_nsuri, $child_ln,
7685 $child_is_transparent, $element_state) = @_;
7686 if ($self->{minus_elements}->{$child_nsuri}->{$child_ln} and
7687 $IsInHTMLInteractiveContent->($child_el, $child_nsuri, $child_ln)) {
7688 $self->{onerror}->(node => $child_el,
7689 type => 'element not allowed:minus',
7690 level => $self->{level}->{must});
7691 } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
7692 #
7693 } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
7694 #
7695 } else {
7696 $self->{onerror}->(node => $child_el,
7697 type => 'element not allowed:datatemplate',
7698 level => $self->{level}->{must});
7699 }
7700 },
7701 check_child_text => sub {
7702 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7703 if ($has_significant) {
7704 $self->{onerror}->(node => $child_node, type => 'character not allowed',
7705 level => $self->{level}->{must});
7706 }
7707 },
7708 is_xml_root => 1,
7709 };
7710
7711 $Element->{$HTML_NS}->{rule} = {
7712 %HTMLChecker,
7713 status => FEATURE_HTML5_DROPPED,
7714 check_attrs => $GetHTMLAttrsChecker->({
7715 condition => $HTMLSelectorsAttrChecker,
7716 mode => $GetHTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker->(),
7717 }, {
7718 %HTMLAttrStatus,
7719 condition => FEATURE_HTML5_DROPPED,
7720 mode => FEATURE_HTML5_DROPPED,
7721 }),
7722 check_start => sub {
7723 my ($self, $item, $element_state) = @_;
7724
7725 $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
7726 $element_state->{in_rule_original} = $self->{flag}->{in_rule};
7727 $self->{flag}->{in_rule} = 1;
7728
7729 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7730 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7731 },
7732 check_child_element => sub { },
7733 check_child_text => sub { },
7734 check_end => sub {
7735 my ($self, $item, $element_state) = @_;
7736
7737 $self->_remove_plus_elements ($element_state);
7738 delete $self->{flag}->{in_rule} unless $element_state->{in_rule_original};
7739
7740 $HTMLChecker{check_end}->(@_);
7741 },
7742 ## NOTE: "MAY be anything that, when the parent |datatemplate|
7743 ## is applied to some conforming data, results in a conforming DOM tree.":
7744 ## We don't check against this.
7745 };
7746
7747 $Element->{$HTML_NS}->{nest} = {
7748 %HTMLEmptyChecker,
7749 status => FEATURE_HTML5_DROPPED,
7750 check_attrs => $GetHTMLAttrsChecker->({
7751 filter => $HTMLSelectorsAttrChecker,
7752 mode => sub {
7753 my ($self, $attr) = @_;
7754 my $value = $attr->value;
7755 if ($value !~ /\A[^\x09\x0A\x0C\x0D\x20]+\z/) {
7756 $self->{onerror}->(node => $attr, type => 'mode:syntax error',
7757 level => $self->{level}->{must});
7758 }
7759 },
7760 }, {
7761 %HTMLAttrStatus,
7762 filter => FEATURE_HTML5_DROPPED,
7763 mode => FEATURE_HTML5_DROPPED,
7764 }),
7765 };
7766
7767 $Element->{$HTML_NS}->{legend} = {
7768 %HTMLPhrasingContentChecker,
7769 status => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7770 check_attrs => $GetHTMLAttrsChecker->({
7771 # XXX
7772 # align => $GetHTMLEnumeratedAttrChecker->({
7773 # top => 1, bottom => 1, left => 1, right => 1,
7774 # }),
7775 form => $HTMLFormAttrChecker,
7776 }, {
7777 %HTMLAttrStatus,
7778 %HTMLM12NCommonAttrStatus,
7779 accesskey => FEATURE_HTML5_FD | FEATURE_M12N10_REC,
7780 align => FEATURE_M12N10_REC_DEPRECATED,
7781 form => FEATURE_HTML5_DROPPED,
7782 lang => FEATURE_HTML5_REC,
7783 }),
7784 check_child_element => sub {
7785 my ($self, $item, $child_el, $child_nsuri, $child_ln,
7786 $child_is_transparent, $element_state) = @_;
7787 ## XXX This does not work for |<legned><ins><blockquote>|
7788 if ($item->{parent_state}->{in_figure}) {
7789 $HTMLFlowContentChecker{check_child_element}->(@_);
7790 } else {
7791 $HTMLPhrasingContentChecker{check_child_element}->(@_);
7792 }
7793 },
7794 check_child_text => sub {
7795 my ($self, $item, $child_node, $has_significant, $element_state) = @_;
7796 if ($item->{parent_state}->{in_figure}) {
7797 $HTMLFlowContentChecker{check_child_text}->(@_);
7798 } else {
7799 $HTMLPhrasingContentChecker{check_child_text}->(@_);
7800 }
7801 },
7802 check_start => sub {
7803 my ($self, $item, $element_state) = @_;
7804 $self->_add_minus_elements ($element_state, {$HTML_NS => {figure => 1}});
7805
7806 $HTMLFlowContentChecker{check_start}->(@_);
7807 },
7808 check_end => sub {
7809 my ($self, $item, $element_state) = @_;
7810 $self->_remove_minus_elements ($element_state);
7811
7812 $HTMLFlowContentChecker{check_end}->(@_);
7813 },
7814 }; # legend
7815
7816 $Element->{$HTML_NS}->{div} = {
7817 %HTMLFlowContentChecker,
7818 status => FEATURE_HTML5_REC,
7819 check_attrs => $GetHTMLAttrsChecker->({
7820 align => $GetHTMLEnumeratedAttrChecker->({
7821 left => 1, center => 1, right => 1, justify => 1,
7822 }),
7823 }, {
7824 %HTMLAttrStatus,
7825 %HTMLM12NXHTML2CommonAttrStatus,
7826 align => FEATURE_M12N10_REC_DEPRECATED,
7827 datafld => FEATURE_HTML4_REC_RESERVED,
7828 dataformatas => FEATURE_HTML4_REC_RESERVED,
7829 datasrc => FEATURE_HTML4_REC_RESERVED,
7830 lang => FEATURE_HTML5_REC,
7831 }),
7832 check_start => sub {
7833 my ($self, $item, $element_state) = @_;
7834
7835 $element_state->{uri_info}->{datasrc}->{type}->{resource} = 1;
7836 $element_state->{uri_info}->{template}->{type}->{resource} = 1;
7837 $element_state->{uri_info}->{ref}->{type}->{resource} = 1;
7838 },
7839 };
7840
7841 $Element->{$HTML_NS}->{center} = {
7842 %HTMLFlowContentChecker,
7843 status => FEATURE_M12N10_REC_DEPRECATED,
7844 check_attrs => $GetHTMLAttrsChecker->({}, {
7845 %HTMLAttrStatus,
7846 %HTMLM12NCommonAttrStatus,
7847 lang => FEATURE_HTML5_REC,
7848 }),
7849 };
7850
7851 $Element->{$HTML_NS}->{font} = {
7852 %HTMLTransparentChecker,
7853 status => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC_DEPRECATED,
7854 check_attrs => $GetHTMLAttrsChecker->({
7855 ## TODO: HTML4 |size|, |color|, |face|
7856 }, {
7857 %HTMLAttrStatus,
7858 class => FEATURE_HTML5_LC | FEATURE_M12N10_REC,
7859 color => FEATURE_M12N10_REC_DEPRECATED,
7860 dir => FEATURE_HTML5_REC,
7861 face => FEATURE_M12N10_REC_DEPRECATED,
7862 id => FEATURE_HTML5_REC,
7863 lang => FEATURE_HTML5_REC,
7864 size => FEATURE_M12N10_REC_DEPRECATED,
7865 style => FEATURE_HTML5_REC,
7866 title => FEATURE_HTML5_REC,
7867 }),
7868 ## NOTE: When the |font| element was defined in the HTML5 specification,
7869 ## it is allowed only in a document with the WYSIWYG signature. The
7870 ## checker does not check whether there is the signature, since the
7871 ## signature is dropped, too, and has never been implemented. (In addition,
7872 ## for any |font| element an "element not defined" error is raised anyway,
7873 ## such that we don't have to raise an additional error.)
7874 };
7875
7876 $Element->{$HTML_NS}->{basefont} = {
7877 %HTMLEmptyChecker,
7878 status => FEATURE_M12N10_REC_DEPRECATED,
7879 check_attrs => $GetHTMLAttrsChecker->({
7880 ## TODO: color, face, size
7881 }, {
7882 %HTMLAttrStatus,
7883 color => FEATURE_M12N10_REC_DEPRECATED,
7884 face => FEATURE_M12N10_REC_DEPRECATED,
7885 id => FEATURE_HTML5_REC,
7886 size => FEATURE_M12N10_REC_DEPRECATED,
7887 }),
7888 }; # basefont
7889
7890 ## TODO: frameset FEATURE_M12N10_REC
7891 ## class title id cols rows style(x10)
7892
7893 $Element->{$HTML_NS}->{frameset} = {
7894 %HTMLEmptyChecker, # XXX
7895 status => FEATURE_M12N10_REC,
7896 check_attrs => $GetHTMLAttrsChecker->({
7897 ## XXX
7898 onafterprint => $HTMLEventHandlerAttrChecker,
7899 onbeforeprint => $HTMLEventHandlerAttrChecker,
7900 onbeforeunload => $HTMLEventHandlerAttrChecker,
7901 onblur => $HTMLEventHandlerAttrChecker,
7902 onerror => $HTMLEventHandlerAttrChecker,
7903 onfocus => $HTMLEventHandlerAttrChecker,
7904 onhashchange => $HTMLEventHandlerAttrChecker,
7905 onload => $HTMLEventHandlerAttrChecker,
7906 onmessage => $HTMLEventHandlerAttrChecker,
7907 onoffline => $HTMLEventHandlerAttrChecker,
7908 ononline => $HTMLEventHandlerAttrChecker,
7909 onpopstate => $HTMLEventHandlerAttrChecker,
7910 onredo => $HTMLEventHandlerAttrChecker,
7911 onresize => $HTMLEventHandlerAttrChecker,
7912 onstorage => $HTMLEventHandlerAttrChecker,
7913 onundo => $HTMLEventHandlerAttrChecker,
7914 onunload => $HTMLEventHandlerAttrChecker,
7915 }, {
7916 %HTMLAttrStatus,
7917 ## XXX
7918 onload => FEATURE_M12N10_REC,
7919 onunload => FEATURE_M12N10_REC,
7920 }),
7921 }; # frameset
7922
7923 ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
7924 ## noframes Common, lang(xhtml10)
7925
7926 ## TODO: CR: rbc rtc @rbspan (M12NXHTML2Common)
7927
7928 ## TODO: xmp, listing, plaintext FEATURE_HTML32_REC_OBSOLETE
7929 ## TODO: ^^^ lang, dir, id, class [HTML 2.x] sdaform [HTML 2.0]
7930 ## xmp, listing sdapref[HTML2,0]
7931
7932 =pod
7933
7934 HTML 2.0 nextid @n
7935
7936 RFC 2659: CERTS CRYPTOPTS
7937
7938 ISO-HTML: pre-html, divN
7939
7940 XHTML2: blockcode (Common), h (Common), separator (Common), l (Common),
7941 di (Common), nl (Common), handler (Common, type), standby (Common),
7942 summary (Common)
7943
7944 Access & XHTML2: access (LC)
7945
7946 XML Events & XForms (for XHTML2 support; very, very low priority)
7947
7948 # XXX marquee onbounce/onfinish/onstart
7949
7950 =cut
7951
7952 ## NOTE: Where RFC 2659 allows additional attributes is unclear.
7953 ## We added them only to |a|. |link| and |form| might also allow them
7954 ## in theory.
7955
7956 $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
7957
7958 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24