/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.50 - (hide annotations) (download)
Sun Feb 24 09:47:13 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.49: +299 -181 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Feb 2008 09:47:09 -0000
	* HTML.pm: HTML5 attribute status added.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.48 sub FEATURE_HTML5_LC () { Whatpm::ContentChecker::FEATURE_STATUS_LC }
8     sub FEATURE_HTML5_AT_RISK () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
9     sub FEATURE_HTML5_WD () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
10     sub FEATURE_HTML5_FD () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
11     sub FEATURE_HTML5_DEFAULT () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
12 wakaba 1.49 sub FEATURE_HTML5_DROPPED () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
13     ## NOTE: Was part of HTML5, but was dropped.
14 wakaba 1.48 sub FEATURE_WF2 () { Whatpm::ContentChecker::FEATURE_STATUS_LC }
15 wakaba 1.49 sub FEATURE_M12N10_REC () { Whatpm::ContentChecker::FEATURE_STATUS_REC }
16     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
17     sub FEATURE_M12N10_REC_DEPRECATED () {
18     Whatpm::ContentChecker::FEATURE_STATUS_REC
19     }
20     sub FEATURE_XHTML10_REC () { Whatpm::ContentChecker::FEATURE_STATUS_CR }
21     sub FEATURE_HTML4_REC_RESERVED () {
22     Whatpm::ContentChecker::FEATURE_STATUS_WD
23     }
24    
25     ## NOTE: M12N10 status is based on its abstract module definition.
26     ## It contains a number of problems. (However, again, it's a REC!)
27    
28     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
29     ## (second edition). Only missing attributes from M12N10 abstract
30     ## definition are added.
31     ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
32     ## 4.01). Only missing attributes from XHTML10 are added.
33 wakaba 1.48
34 wakaba 1.29 ## December 2007 HTML5 Classification
35    
36     my $HTMLMetadataContent = {
37     $HTML_NS => {
38     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
39     'event-source' => 1, command => 1, datatemplate => 1,
40     ## NOTE: A |meta| with no |name| element is not allowed as
41     ## a metadata content other than |head| element.
42     meta => 1,
43     },
44     ## NOTE: RDF is mentioned in the HTML5 spec.
45     ## TODO: Other RDF elements?
46     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
47     };
48    
49     my $HTMLProseContent = {
50     $HTML_NS => {
51     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
52     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
53     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
54     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
55     details => 1, ## ISSUE: "Prose element" in spec.
56     datagrid => 1, ## ISSUE: "Prose element" in spec.
57     datatemplate => 1,
58     div => 1, ## ISSUE: No category in spec.
59     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
60     ## Additionally, it must be before any other element or
61     ## non-inter-element-whitespace text node.
62     style => 1,
63    
64 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
65 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
66     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
67     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
68     command => 1, font => 1,
69     a => 1,
70     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
71     ## NOTE: |area| is allowed only as a descendant of |map|.
72     area => 1,
73    
74     ins => 1, del => 1,
75    
76     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
77     menu => 1,
78    
79     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
80     canvas => 1,
81     },
82    
83     ## NOTE: Embedded
84     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
85     q<http://www.w3.org/2000/svg> => {svg => 1},
86     };
87    
88     my $HTMLSectioningContent = {
89     $HTML_NS => {
90     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
91     ## NOTE: |body| is only allowed in |html| element.
92     body => 1,
93     },
94     };
95    
96     my $HTMLHeadingContent = {
97     $HTML_NS => {
98     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
99     },
100     };
101    
102     my $HTMLPhrasingContent = {
103     ## NOTE: All phrasing content is also prose content.
104     $HTML_NS => {
105 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
106 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
107     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
108     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
109     command => 1, font => 1,
110     a => 1,
111     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
112     ## NOTE: |area| is allowed only as a descendant of |map|.
113     area => 1,
114    
115     ## NOTE: Transparent.
116     ins => 1, del => 1,
117    
118     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
119     menu => 1,
120    
121     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
122     canvas => 1,
123     },
124    
125     ## NOTE: Embedded
126     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
127     q<http://www.w3.org/2000/svg> => {svg => 1},
128    
129     ## NOTE: And non-inter-element-whitespace text nodes.
130     };
131    
132 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
133 wakaba 1.29
134     my $HTMLInteractiveContent = {
135     $HTML_NS => {
136     a => 1,
137 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
138 wakaba 1.29 },
139     };
140    
141 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
142     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
143    
144     ## -- Common attribute syntacx checkers
145    
146 wakaba 1.1 our $AttrChecker;
147    
148     my $GetHTMLEnumeratedAttrChecker = sub {
149     my $states = shift; # {value => conforming ? 1 : -1}
150     return sub {
151     my ($self, $attr) = @_;
152     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
153     if ($states->{$value} > 0) {
154     #
155     } elsif ($states->{$value}) {
156     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
157     } else {
158     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
159     }
160     };
161     }; # $GetHTMLEnumeratedAttrChecker
162    
163     my $GetHTMLBooleanAttrChecker = sub {
164     my $local_name = shift;
165     return sub {
166     my ($self, $attr) = @_;
167     my $value = $attr->value;
168     unless ($value eq $local_name or $value eq '') {
169     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
170     }
171     };
172     }; # $GetHTMLBooleanAttrChecker
173    
174 wakaba 1.8 ## Unordered set of space-separated tokens
175 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
176 wakaba 1.8 my ($self, $attr) = @_;
177     my %word;
178     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
179     unless ($word{$word}) {
180     $word{$word} = 1;
181     } else {
182     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
183     }
184     }
185 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
186 wakaba 1.8
187 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
188     ## whose allowed values are defined by the section on link types)
189     my $HTMLLinkTypesAttrChecker = sub {
190 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
191 wakaba 1.1 my %word;
192     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
193     unless ($word{$word}) {
194     $word{$word} = 1;
195 wakaba 1.18 } elsif ($word eq 'up') {
196     #
197 wakaba 1.1 } else {
198     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
199     }
200     }
201     ## NOTE: Case sensitive match (since HTML5 spec does not say link
202     ## types are case-insensitive and it says "The value should not
203     ## be confusingly similar to any other defined value (e.g.
204     ## differing only in case).").
205     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
206     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
207     ## values to be used conformingly.
208     require Whatpm::_LinkTypeList;
209     our $LinkType;
210     for my $word (keys %word) {
211     my $def = $LinkType->{$word};
212     if (defined $def) {
213     if ($def->{status} eq 'accepted') {
214     if (defined $def->{effect}->[$a_or_area]) {
215     #
216     } else {
217     $self->{onerror}->(node => $attr,
218     type => 'link type:bad context:'.$word);
219     }
220     } elsif ($def->{status} eq 'proposal') {
221     $self->{onerror}->(node => $attr, level => 's',
222     type => 'link type:proposed:'.$word);
223 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
224     #
225     } else {
226     $self->{onerror}->(node => $attr,
227     type => 'link type:bad context:'.$word);
228     }
229 wakaba 1.1 } else { # rejected or synonym
230     $self->{onerror}->(node => $attr,
231     type => 'link type:non-conforming:'.$word);
232     }
233 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
234     if ($word eq 'alternate') {
235     #
236     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
237     $todo->{has_hyperlink_link_type} = 1;
238     }
239     }
240 wakaba 1.1 if ($def->{unique}) {
241     unless ($self->{has_link_type}->{$word}) {
242     $self->{has_link_type}->{$word} = 1;
243     } else {
244     $self->{onerror}->(node => $attr,
245     type => 'link type:duplicate:'.$word);
246     }
247     }
248     } else {
249     $self->{onerror}->(node => $attr, level => 'unsupported',
250     type => 'link type:'.$word);
251     }
252     }
253 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
254     if $word{alternate} and not $word{stylesheet};
255 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
256     ## says that using both X-Pingback: header field and HTML
257     ## <link rel=pingback> is deprecated and if both appears they
258     ## SHOULD contain exactly the same value.
259     ## ISSUE: Pingback 1.0 specification defines the exact representation
260     ## of its link element, which cannot be tested by the current arch.
261     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
262     ## include any string that matches to the pattern for the rel=pingback link,
263     ## which again inpossible to test.
264     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
265 wakaba 1.12
266     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
267 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
268     ## then they SHOULD be described in different paragraphs.".
269 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
270 wakaba 1.20
271     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
272 wakaba 1.1
273     ## URI (or IRI)
274     my $HTMLURIAttrChecker = sub {
275     my ($self, $attr) = @_;
276     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
277     my $value = $attr->value;
278     Whatpm::URIChecker->check_iri_reference ($value, sub {
279     my %opt = @_;
280     $self->{onerror}->(node => $attr, level => $opt{level},
281     type => 'URI::'.$opt{type}.
282     (defined $opt{position} ? ':'.$opt{position} : ''));
283     });
284 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
285 wakaba 1.1 }; # $HTMLURIAttrChecker
286    
287     ## A space separated list of one or more URIs (or IRIs)
288     my $HTMLSpaceURIsAttrChecker = sub {
289     my ($self, $attr) = @_;
290     my $i = 0;
291     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
292     Whatpm::URIChecker->check_iri_reference ($value, sub {
293     my %opt = @_;
294     $self->{onerror}->(node => $attr, level => $opt{level},
295 wakaba 1.2 type => 'URIs:'.':'.
296     $opt{type}.':'.$i.
297 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
298     });
299     $i++;
300     }
301     ## ISSUE: Relative references?
302     ## ISSUE: Leading or trailing white spaces are conformant?
303     ## ISSUE: A sequence of white space characters are conformant?
304     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
305     ## NOTE: Duplication seems not an error.
306 wakaba 1.4 $self->{has_uri_attr} = 1;
307 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
308    
309     my $HTMLDatetimeAttrChecker = sub {
310     my ($self, $attr) = @_;
311     my $value = $attr->value;
312     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
313     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
314     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
315     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
316     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
317     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
318     if $d < 1 or
319     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
320     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
321     if $M == 2 and $d == 29 and
322     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
323     } else {
324     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
325     }
326     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
327     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
328     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
329     if defined $s and $s > 59;
330     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
331     if $zh > 23;
332     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
333     if $zm > 59;
334     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
335     } else {
336     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
337     }
338     }; # $HTMLDatetimeAttrChecker
339    
340     my $HTMLIntegerAttrChecker = sub {
341     my ($self, $attr) = @_;
342     my $value = $attr->value;
343     unless ($value =~ /\A-?[0-9]+\z/) {
344     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
345     }
346     }; # $HTMLIntegerAttrChecker
347    
348     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
349     my $range_check = shift;
350     return sub {
351     my ($self, $attr) = @_;
352     my $value = $attr->value;
353     if ($value =~ /\A[0-9]+\z/) {
354     unless ($range_check->($value + 0)) {
355     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
356     }
357     } else {
358     $self->{onerror}->(node => $attr,
359     type => 'nninteger:syntax error');
360     }
361     };
362     }; # $GetHTMLNonNegativeIntegerAttrChecker
363    
364     my $GetHTMLFloatingPointNumberAttrChecker = sub {
365     my $range_check = shift;
366     return sub {
367     my ($self, $attr) = @_;
368     my $value = $attr->value;
369     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
370     unless ($range_check->($value + 0)) {
371     $self->{onerror}->(node => $attr, type => 'float:out of range');
372     }
373     } else {
374     $self->{onerror}->(node => $attr,
375     type => 'float:syntax error');
376     }
377     };
378     }; # $GetHTMLFloatingPointNumberAttrChecker
379    
380     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
381     ## ISSUE: RFC 2046 does not define syntax of media types.
382     ## ISSUE: The definition of "a valid MIME type" is unknown.
383     ## Syntactical correctness?
384     my $HTMLIMTAttrChecker = sub {
385     my ($self, $attr) = @_;
386     my $value = $attr->value;
387     ## ISSUE: RFC 2045 Content-Type header field allows insertion
388     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
389     ## ISSUE: RFC 2231 extension? Maybe no.
390     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
391     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
392     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
393     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
394     my @type = ($1, $2);
395     my $param = $3;
396     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
397     if (defined $2) {
398     push @type, $1 => $2;
399     } else {
400     my $n = $1;
401     my $v = $2;
402     $v =~ s/\\(.)/$1/gs;
403     push @type, $n => $v;
404     }
405     }
406     require Whatpm::IMTChecker;
407     Whatpm::IMTChecker->check_imt (sub {
408     my %opt = @_;
409     $self->{onerror}->(node => $attr, level => $opt{level},
410     type => 'IMT:'.$opt{type});
411     }, @type);
412     } else {
413     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
414     }
415     }; # $HTMLIMTAttrChecker
416    
417     my $HTMLLanguageTagAttrChecker = sub {
418 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
419    
420 wakaba 1.1 my ($self, $attr) = @_;
421 wakaba 1.6 my $value = $attr->value;
422     require Whatpm::LangTag;
423     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
424     my %opt = @_;
425     my $type = 'LangTag:'.$opt{type};
426     $type .= ':' . $opt{subtag} if defined $opt{subtag};
427     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
428     level => $opt{level});
429     });
430 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
431 wakaba 1.6
432     ## TODO: testdata
433 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
434    
435     ## "A valid media query [MQ]"
436     my $HTMLMQAttrChecker = sub {
437     my ($self, $attr) = @_;
438     $self->{onerror}->(node => $attr, level => 'unsupported',
439     type => 'media query');
440     ## ISSUE: What is "a valid media query"?
441     }; # $HTMLMQAttrChecker
442    
443     my $HTMLEventHandlerAttrChecker = sub {
444     my ($self, $attr) = @_;
445     $self->{onerror}->(node => $attr, level => 'unsupported',
446     type => 'event handler');
447     ## TODO: MUST contain valid ECMAScript code matching the
448     ## ECMAScript |FunctionBody| production. [ECMA262]
449     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
450     ## ISSUE: Automatic semicolon insertion does not apply?
451     ## ISSUE: Other script languages?
452     }; # $HTMLEventHandlerAttrChecker
453    
454     my $HTMLUsemapAttrChecker = sub {
455     my ($self, $attr) = @_;
456     ## MUST be a valid hashed ID reference to a |map| element
457     my $value = $attr->value;
458     if ($value =~ s/^#//) {
459     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
460     push @{$self->{usemap}}, [$value => $attr];
461     } else {
462     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
463     }
464     ## NOTE: Space characters in hashed ID references are conforming.
465     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
466     }; # $HTMLUsemapAttrChecker
467    
468     my $HTMLTargetAttrChecker = sub {
469     my ($self, $attr) = @_;
470     my $value = $attr->value;
471     if ($value =~ /^_/) {
472     $value = lc $value; ## ISSUE: ASCII case-insentitive?
473     unless ({
474     _self => 1, _parent => 1, _top => 1,
475     }->{$value}) {
476     $self->{onerror}->(node => $attr,
477     type => 'reserved browsing context name');
478     }
479     } else {
480 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
481 wakaba 1.1 }
482     }; # $HTMLTargetAttrChecker
483    
484 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
485     my ($self, $attr) = @_;
486    
487     ## ISSUE: Namespace resolution?
488    
489     my $value = $attr->value;
490    
491     require Whatpm::CSS::SelectorsParser;
492     my $p = Whatpm::CSS::SelectorsParser->new;
493     $p->{pseudo_class}->{$_} = 1 for qw/
494     active checked disabled empty enabled first-child first-of-type
495     focus hover indeterminate last-child last-of-type link only-child
496     only-of-type root target visited
497     lang nth-child nth-last-child nth-of-type nth-last-of-type not
498     -manakai-contains -manakai-current
499     /;
500    
501     $p->{pseudo_element}->{$_} = 1 for qw/
502     after before first-letter first-line
503     /;
504    
505     $p->{must_level} = $self->{must_level};
506     $p->{onerror} = sub {
507     my %opt = @_;
508     $opt{type} = 'selectors:'.$opt{type};
509     $self->{onerror}->(%opt, node => $attr);
510     };
511     $p->parse_string ($value);
512     }; # $HTMLSelectorsAttrChecker
513    
514 wakaba 1.1 my $HTMLAttrChecker = {
515     id => sub {
516     ## NOTE: |map| has its own variant of |id=""| checker
517     my ($self, $attr) = @_;
518     my $value = $attr->value;
519     if (length $value > 0) {
520     if ($self->{id}->{$value}) {
521     $self->{onerror}->(node => $attr, type => 'duplicate ID');
522     push @{$self->{id}->{$value}}, $attr;
523     } else {
524     $self->{id}->{$value} = [$attr];
525     }
526     if ($value =~ /[\x09-\x0D\x20]/) {
527     $self->{onerror}->(node => $attr, type => 'space in ID');
528     }
529     } else {
530     ## NOTE: MUST contain at least one character
531     $self->{onerror}->(node => $attr, type => 'empty attribute value');
532     }
533     },
534     title => sub {}, ## NOTE: No conformance creteria
535     lang => sub {
536     my ($self, $attr) = @_;
537 wakaba 1.6 my $value = $attr->value;
538     if ($value eq '') {
539     #
540     } else {
541     require Whatpm::LangTag;
542     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
543     my %opt = @_;
544     my $type = 'LangTag:'.$opt{type};
545     $type .= ':' . $opt{subtag} if defined $opt{subtag};
546     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
547     level => $opt{level});
548     });
549     }
550 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
551     unless ($attr->owner_document->manakai_is_html) {
552     $self->{onerror}->(node => $attr, type => 'in XML:lang');
553     }
554 wakaba 1.6
555     ## TODO: test data
556 wakaba 1.1 },
557     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
558     class => sub {
559     my ($self, $attr) = @_;
560     my %word;
561     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
562     unless ($word{$word}) {
563     $word{$word} = 1;
564     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
565     } else {
566     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
567     }
568     }
569     },
570     contextmenu => sub {
571     my ($self, $attr) = @_;
572     my $value = $attr->value;
573     push @{$self->{contextmenu}}, [$value => $attr];
574     ## ISSUE: "The value must be the ID of a menu element in the DOM."
575     ## What is "in the DOM"? A menu Element node that is not part
576     ## of the Document tree is in the DOM? A menu Element node that
577     ## belong to another Document tree is in the DOM?
578     },
579 wakaba 1.48 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), ## TODO: status: Working Draft
580 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
581     ## TODO: ref, template, registrationmark
582 wakaba 1.1 };
583    
584 wakaba 1.49 my %HTMLAttrStatus = (
585 wakaba 1.50 class => FEATURE_HTML5_DEFAULT,
586     contenteditable => FEATURE_HTML5_DEFAULT,
587     contextmenu => FEATURE_HTML5_WD,
588     dir => FEATURE_HTML5_DEFAULT,
589     draggable => FEATURE_HTML5_LC,
590     id => FEATURE_HTML5_DEFAULT,
591     irrelevant => FEATURE_HTML5_WD,
592     lang => FEATURE_HTML5_DEFAULT,
593     ref => FEATURE_HTML5_AT_RISK,
594     registrationmark => FEATURE_HTML5_AT_RISK,
595     tabindex => FEATURE_HTML5_DEFAULT,
596     template => FEATURE_HTML5_AT_RISK,
597     title => FEATURE_HTML5_DEFAULT,
598 wakaba 1.49 );
599    
600     my %HTMLM12NCommonAttrStatus = (
601 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
602     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
603     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
604     onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
605     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
606     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
607     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
608     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
609     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
610     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
611     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
612     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
613     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
614     style => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
615     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
616 wakaba 1.49 );
617    
618 wakaba 1.1 for (qw/
619     onabort onbeforeunload onblur onchange onclick oncontextmenu
620     ondblclick ondrag ondragend ondragenter ondragleave ondragover
621     ondragstart ondrop onerror onfocus onkeydown onkeypress
622     onkeyup onload onmessage onmousedown onmousemove onmouseout
623     onmouseover onmouseup onmousewheel onresize onscroll onselect
624     onsubmit onunload
625     /) {
626     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
627 wakaba 1.50 $HTMLAttrStatus{$_} = FEATURE_HTML5_DEFAULT;
628 wakaba 1.1 }
629    
630     my $GetHTMLAttrsChecker = sub {
631     my $element_specific_checker = shift;
632 wakaba 1.49 my $element_specific_status = shift;
633 wakaba 1.1 return sub {
634 wakaba 1.40 my ($self, $item, $element_state) = @_;
635     for my $attr (@{$item->{node}->attributes}) {
636 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
637     $attr_ns = '' unless defined $attr_ns;
638     my $attr_ln = $attr->manakai_local_name;
639     my $checker;
640     if ($attr_ns eq '') {
641     $checker = $element_specific_checker->{$attr_ln}
642 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
643 wakaba 1.1 }
644     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
645 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
646 wakaba 1.1 if ($checker) {
647 wakaba 1.40 $checker->($self, $attr, $item);
648 wakaba 1.49 } elsif ($attr_ns eq '') {
649     $self->{onerror}->(node => $attr, level => $self->{must_level},
650     type => 'attribute not defined');
651 wakaba 1.1 } else {
652     $self->{onerror}->(node => $attr, level => 'unsupported',
653     type => 'attribute');
654 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
655     }
656     if ($attr_ns eq '') {
657     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
658 wakaba 1.1 }
659 wakaba 1.49 ## TODO: global attribute
660 wakaba 1.1 }
661     };
662     }; # $GetHTMLAttrsChecker
663    
664 wakaba 1.40 my %HTMLChecker = (
665     %Whatpm::ContentChecker::AnyChecker,
666 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
667 wakaba 1.40 );
668    
669     my %HTMLEmptyChecker = (
670     %HTMLChecker,
671     check_child_element => sub {
672     my ($self, $item, $child_el, $child_nsuri, $child_ln,
673     $child_is_transparent, $element_state) = @_;
674     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
675     $self->{onerror}->(node => $child_el,
676     type => 'element not allowed:minus',
677     level => $self->{must_level});
678     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
679     #
680     } else {
681     $self->{onerror}->(node => $child_el,
682     type => 'element not allowed:empty',
683     level => $self->{must_level});
684     }
685     },
686     check_child_text => sub {
687     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
688     if ($has_significant) {
689     $self->{onerror}->(node => $child_node,
690     type => 'character not allowed:empty',
691     level => $self->{must_level});
692     }
693     },
694     );
695    
696     my %HTMLTextChecker = (
697     %HTMLChecker,
698     check_child_element => sub {
699     my ($self, $item, $child_el, $child_nsuri, $child_ln,
700     $child_is_transparent, $element_state) = @_;
701     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
702     $self->{onerror}->(node => $child_el,
703     type => 'element not allowed:minus',
704     level => $self->{must_level});
705     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
706     #
707     } else {
708     $self->{onerror}->(node => $child_el, type => 'element not allowed');
709     }
710     },
711     );
712    
713     my %HTMLProseContentChecker = (
714     %HTMLChecker,
715     check_child_element => sub {
716     my ($self, $item, $child_el, $child_nsuri, $child_ln,
717     $child_is_transparent, $element_state) = @_;
718     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
719     $self->{onerror}->(node => $child_el,
720     type => 'element not allowed:minus',
721     level => $self->{must_level});
722     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
723     #
724     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
725     if ($element_state->{has_non_style} or
726     not $child_el->has_attribute_ns (undef, 'scoped')) {
727     $self->{onerror}->(node => $child_el,
728     type => 'element not allowed:prose style',
729     level => $self->{must_level});
730     }
731     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
732 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
733 wakaba 1.40 } else {
734     $element_state->{has_non_style} = 1;
735     $self->{onerror}->(node => $child_el,
736     type => 'element not allowed:prose',
737     level => $self->{must_level})
738     }
739     },
740     check_child_text => sub {
741     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
742     if ($has_significant) {
743     $element_state->{has_non_style} = 1;
744     }
745     },
746     check_end => sub {
747     my ($self, $item, $element_state) = @_;
748     if ($element_state->{has_significant}) {
749 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
750 wakaba 1.40 } elsif ($item->{transparent}) {
751     #
752     } else {
753     $self->{onerror}->(node => $item->{node},
754     level => $self->{should_level},
755     type => 'no significant content');
756     }
757     },
758     );
759    
760     my %HTMLPhrasingContentChecker = (
761     %HTMLChecker,
762     check_child_element => sub {
763     my ($self, $item, $child_el, $child_nsuri, $child_ln,
764     $child_is_transparent, $element_state) = @_;
765     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
766     $self->{onerror}->(node => $child_el,
767     type => 'element not allowed:minus',
768     level => $self->{must_level});
769     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
770     #
771     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
772     #
773     } else {
774     $self->{onerror}->(node => $child_el,
775     type => 'element not allowed:phrasing',
776     level => $self->{must_level});
777     }
778     },
779     check_end => $HTMLProseContentChecker{check_end},
780     ## NOTE: The definition for |li| assumes that the only differences
781     ## between prose and phrasing content checkers are |check_child_element|
782     ## and |check_child_text|.
783     );
784    
785     my %HTMLTransparentChecker = %HTMLProseContentChecker;
786     ## ISSUE: Significant content rule should be applied to transparent element
787 wakaba 1.46 ## with parent?
788 wakaba 1.40
789 wakaba 1.1 our $Element;
790     our $ElementDefault;
791    
792     $Element->{$HTML_NS}->{''} = {
793 wakaba 1.40 %HTMLChecker,
794 wakaba 1.49 check_start => sub {
795     my ($self, $item, $element_state) = @_;
796     $self->{onerror}->(node => $item->{node}, level => $self->{must_level},
797     type => 'element not defined');
798     },
799 wakaba 1.1 };
800    
801     $Element->{$HTML_NS}->{html} = {
802 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
803 wakaba 1.1 is_root => 1,
804 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
805 wakaba 1.16 manifest => $HTMLURIAttrChecker,
806 wakaba 1.1 xmlns => sub {
807     my ($self, $attr) = @_;
808     my $value = $attr->value;
809     unless ($value eq $HTML_NS) {
810     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
811     }
812     unless ($attr->owner_document->manakai_is_html) {
813     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
814     ## TODO: Test
815     }
816     },
817 wakaba 1.49 }, {
818     %HTMLAttrStatus,
819 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
820     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
821     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
822     manifest => FEATURE_HTML5_DEFAULT,
823 wakaba 1.49 version => FEATURE_M12N10_REC,
824 wakaba 1.50 xmlns => FEATURE_HTML5_DEFAULT,
825 wakaba 1.1 }),
826 wakaba 1.40 check_start => sub {
827     my ($self, $item, $element_state) = @_;
828     $element_state->{phase} = 'before head';
829     },
830     check_child_element => sub {
831     my ($self, $item, $child_el, $child_nsuri, $child_ln,
832     $child_is_transparent, $element_state) = @_;
833     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
834     $self->{onerror}->(node => $child_el,
835     type => 'element not allowed:minus',
836     level => $self->{must_level});
837     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
838     #
839     } elsif ($element_state->{phase} eq 'before head') {
840     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
841     $element_state->{phase} = 'after head';
842     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
843     $self->{onerror}->(node => $child_el,
844     type => 'ps element missing:head');
845     $element_state->{phase} = 'after body';
846     } else {
847     $self->{onerror}->(node => $child_el,
848     type => 'element not allowed');
849     }
850     } elsif ($element_state->{phase} eq 'after head') {
851     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
852     $element_state->{phase} = 'after body';
853     } else {
854     $self->{onerror}->(node => $child_el,
855     type => 'element not allowed');
856     }
857     } elsif ($element_state->{phase} eq 'after body') {
858     $self->{onerror}->(node => $child_el,
859     type => 'element not allowed');
860     } else {
861     die "check_child_element: Bad |html| phase: $element_state->{phase}";
862     }
863     },
864     check_child_text => sub {
865     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
866     if ($has_significant) {
867     $self->{onerror}->(node => $child_node,
868     type => 'character not allowed');
869     }
870     },
871     check_end => sub {
872     my ($self, $item, $element_state) = @_;
873     if ($element_state->{phase} eq 'after body') {
874     #
875     } elsif ($element_state->{phase} eq 'before head') {
876     $self->{onerror}->(node => $item->{node},
877     type => 'child element missing:head');
878     $self->{onerror}->(node => $item->{node},
879     type => 'child element missing:body');
880     } elsif ($element_state->{phase} eq 'after head') {
881     $self->{onerror}->(node => $item->{node},
882     type => 'child element missing:body');
883     } else {
884     die "check_end: Bad |html| phase: $element_state->{phase}";
885     }
886 wakaba 1.1
887 wakaba 1.40 $HTMLChecker{check_end}->(@_);
888     },
889     };
890 wakaba 1.25
891 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
892 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
893     check_attrs => $GetHTMLAttrsChecker->({}, {
894     %HTMLAttrStatus,
895 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
896     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
897     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
898 wakaba 1.49 profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
899     }),
900 wakaba 1.40 check_child_element => sub {
901     my ($self, $item, $child_el, $child_nsuri, $child_ln,
902     $child_is_transparent, $element_state) = @_;
903     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
904     $self->{onerror}->(node => $child_el,
905     type => 'element not allowed:minus',
906     level => $self->{must_level});
907     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
908     #
909     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
910     unless ($element_state->{has_title}) {
911     $element_state->{has_title} = 1;
912     } else {
913     $self->{onerror}->(node => $child_el,
914     type => 'element not allowed:head title',
915     level => $self->{must_level});
916     }
917     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
918     if ($child_el->has_attribute_ns (undef, 'scoped')) {
919     $self->{onerror}->(node => $child_el,
920     type => 'element not allowed:head style',
921     level => $self->{must_level});
922 wakaba 1.1 }
923 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
924     #
925    
926     ## NOTE: |meta| is a metadata content. However, strictly speaking,
927     ## a |meta| element with none of |charset|, |name|,
928     ## or |http-equiv| attribute is not allowed. It is non-conforming
929     ## anyway.
930     } else {
931     $self->{onerror}->(node => $child_el,
932     type => 'element not allowed:metadata',
933     level => $self->{must_level});
934     }
935     $element_state->{in_head_original} = $self->{flag}->{in_head};
936     $self->{flag}->{in_head} = 1;
937     },
938     check_child_text => sub {
939     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
940     if ($has_significant) {
941     $self->{onerror}->(node => $child_node, type => 'character not allowed');
942 wakaba 1.1 }
943 wakaba 1.40 },
944     check_end => sub {
945     my ($self, $item, $element_state) = @_;
946     unless ($element_state->{has_title}) {
947     $self->{onerror}->(node => $item->{node},
948     type => 'child element missing:title');
949 wakaba 1.1 }
950 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
951 wakaba 1.1
952 wakaba 1.40 $HTMLChecker{check_end}->(@_);
953 wakaba 1.1 },
954     };
955    
956 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
957     %HTMLTextChecker,
958 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
959     check_attrs => $GetHTMLAttrsChecker->({}, {
960     %HTMLAttrStatus,
961 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
962     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
963     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
964 wakaba 1.49 }),
965 wakaba 1.40 };
966 wakaba 1.1
967 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
968 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
969 wakaba 1.40 %HTMLEmptyChecker,
970     check_attrs => sub {
971     my ($self, $item, $element_state) = @_;
972 wakaba 1.1
973 wakaba 1.40 if ($self->{has_base}) {
974     $self->{onerror}->(node => $item->{node},
975     type => 'element not allowed:base');
976     } else {
977     $self->{has_base} = 1;
978 wakaba 1.29 }
979    
980 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
981     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
982 wakaba 1.14
983     if ($self->{has_uri_attr} and $has_href) {
984 wakaba 1.4 ## ISSUE: Are these examples conforming?
985     ## <head profile="a b c"><base href> (except for |profile|'s
986     ## non-conformance)
987     ## <title xml:base="relative"/><base href/> (maybe it should be)
988     ## <unknown xmlns="relative"/><base href/> (assuming that
989     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
990     ## <style>@import 'relative';</style><base href>
991     ## <script>location.href = 'relative';</script><base href>
992 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
993     ## an exception.
994 wakaba 1.40 $self->{onerror}->(node => $item->{node},
995 wakaba 1.4 type => 'basehref after URI attribute');
996     }
997 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
998 wakaba 1.4 ## ISSUE: Are these examples conforming?
999     ## <head><title xlink:href=""/><base target="name"/></head>
1000     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
1001     ## (assuming that |xbl:xbl| is allowed before |base|)
1002     ## NOTE: These are non-conformant anyway because of |head|'s content model:
1003     ## <link href=""/><base target="name"/>
1004     ## <link rel=unknown href=""><base target=name>
1005 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1006 wakaba 1.4 type => 'basetarget after hyperlink');
1007     }
1008    
1009 wakaba 1.14 if (not $has_href and not $has_target) {
1010 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1011 wakaba 1.14 type => 'attribute missing:href|target');
1012     }
1013    
1014 wakaba 1.4 return $GetHTMLAttrsChecker->({
1015     href => $HTMLURIAttrChecker,
1016     target => $HTMLTargetAttrChecker,
1017 wakaba 1.49 }, {
1018     %HTMLAttrStatus,
1019 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1020     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1021     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1022 wakaba 1.40 })->($self, $item, $element_state);
1023 wakaba 1.4 },
1024 wakaba 1.1 };
1025    
1026     $Element->{$HTML_NS}->{link} = {
1027 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1028 wakaba 1.40 %HTMLEmptyChecker,
1029     check_attrs => sub {
1030     my ($self, $item, $element_state) = @_;
1031 wakaba 1.1 $GetHTMLAttrsChecker->({
1032     href => $HTMLURIAttrChecker,
1033 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1034 wakaba 1.1 media => $HTMLMQAttrChecker,
1035     hreflang => $HTMLLanguageTagAttrChecker,
1036     type => $HTMLIMTAttrChecker,
1037     ## NOTE: Though |title| has special semantics,
1038     ## syntactically same as the |title| as global attribute.
1039 wakaba 1.49 }, {
1040     %HTMLAttrStatus,
1041     %HTMLM12NCommonAttrStatus,
1042     charset => FEATURE_M12N10_REC,
1043 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1044     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1045     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1046     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1047     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1048 wakaba 1.49 rev => FEATURE_M12N10_REC,
1049     target => FEATURE_M12N10_REC,
1050 wakaba 1.50 type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1051 wakaba 1.40 })->($self, $item, $element_state);
1052     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1053     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1054 wakaba 1.4 } else {
1055 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1056 wakaba 1.1 type => 'attribute missing:href');
1057     }
1058 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1059     $self->{onerror}->(node => $item->{node},
1060 wakaba 1.1 type => 'attribute missing:rel');
1061     }
1062     },
1063     };
1064    
1065     $Element->{$HTML_NS}->{meta} = {
1066 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1067 wakaba 1.40 %HTMLEmptyChecker,
1068     check_attrs => sub {
1069     my ($self, $item, $element_state) = @_;
1070 wakaba 1.1 my $name_attr;
1071     my $http_equiv_attr;
1072     my $charset_attr;
1073     my $content_attr;
1074 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1075 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1076     $attr_ns = '' unless defined $attr_ns;
1077     my $attr_ln = $attr->manakai_local_name;
1078     my $checker;
1079     if ($attr_ns eq '') {
1080     if ($attr_ln eq 'content') {
1081     $content_attr = $attr;
1082     $checker = 1;
1083     } elsif ($attr_ln eq 'name') {
1084     $name_attr = $attr;
1085     $checker = 1;
1086     } elsif ($attr_ln eq 'http-equiv') {
1087     $http_equiv_attr = $attr;
1088     $checker = 1;
1089     } elsif ($attr_ln eq 'charset') {
1090     $charset_attr = $attr;
1091     $checker = 1;
1092     } else {
1093     $checker = $HTMLAttrChecker->{$attr_ln}
1094     || $AttrChecker->{$attr_ns}->{$attr_ln}
1095     || $AttrChecker->{$attr_ns}->{''};
1096     }
1097     } else {
1098     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1099     || $AttrChecker->{$attr_ns}->{''};
1100     }
1101     if ($checker) {
1102     $checker->($self, $attr) if ref $checker;
1103 wakaba 1.49 } elsif ($attr_ns eq '') {
1104     $self->{onerror}->(node => $attr, level => $self->{must_level},
1105     type => 'attribute not defined');
1106 wakaba 1.1 } else {
1107     $self->{onerror}->(node => $attr, level => 'unsupported',
1108     type => 'attribute');
1109 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1110     }
1111    
1112     if ($attr_ns eq '') {
1113     $self->_attr_status_info ($attr, {
1114     %HTMLAttrStatus,
1115 wakaba 1.50 charset => FEATURE_HTML5_DEFAULT,
1116     content => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1117     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1118     'http-equiv' => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1119     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1120     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1121     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1122 wakaba 1.49 scheme => FEATURE_M12N10_REC,
1123     }->{$attr_ln});
1124 wakaba 1.1 }
1125     }
1126    
1127     if (defined $name_attr) {
1128     if (defined $http_equiv_attr) {
1129     $self->{onerror}->(node => $http_equiv_attr,
1130     type => 'attribute not allowed');
1131     } elsif (defined $charset_attr) {
1132     $self->{onerror}->(node => $charset_attr,
1133     type => 'attribute not allowed');
1134     }
1135     my $metadata_name = $name_attr->value;
1136     my $metadata_value;
1137     if (defined $content_attr) {
1138     $metadata_value = $content_attr->value;
1139     } else {
1140 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1141 wakaba 1.1 type => 'attribute missing:content');
1142     $metadata_value = '';
1143     }
1144     } elsif (defined $http_equiv_attr) {
1145     if (defined $charset_attr) {
1146     $self->{onerror}->(node => $charset_attr,
1147     type => 'attribute not allowed');
1148     }
1149     unless (defined $content_attr) {
1150 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1151 wakaba 1.1 type => 'attribute missing:content');
1152     }
1153     } elsif (defined $charset_attr) {
1154     if (defined $content_attr) {
1155     $self->{onerror}->(node => $content_attr,
1156     type => 'attribute not allowed');
1157     }
1158     } else {
1159     if (defined $content_attr) {
1160     $self->{onerror}->(node => $content_attr,
1161     type => 'attribute not allowed');
1162 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1163 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1164     } else {
1165 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1166 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1167     }
1168     }
1169    
1170 wakaba 1.32 my $check_charset_decl = sub () {
1171 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1172 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1173     for my $el (@{$parent->child_nodes}) {
1174     next unless $el->node_type == 1; # ELEMENT_NODE
1175 wakaba 1.40 unless ($el eq $item->{node}) {
1176 wakaba 1.29 ## NOTE: Not the first child element.
1177 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1178 wakaba 1.32 type => 'element not allowed:meta charset',
1179     level => $self->{must_level});
1180 wakaba 1.29 }
1181     last;
1182     ## NOTE: Entity references are not supported.
1183     }
1184     } else {
1185 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1186 wakaba 1.32 type => 'element not allowed:meta charset',
1187     level => $self->{must_level});
1188 wakaba 1.29 }
1189    
1190 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1191     $self->{onerror}->(node => $item->{node},
1192 wakaba 1.32 type => 'in XML:charset',
1193     level => $self->{must_level});
1194 wakaba 1.1 }
1195 wakaba 1.32 }; # $check_charset_decl
1196 wakaba 1.21
1197 wakaba 1.32 my $check_charset = sub ($$) {
1198     my ($attr, $charset_value) = @_;
1199 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1200     ## is not explicitly spelled in the HTML5 spec, the Character Set
1201     ## registry of IANA, which is referenced from HTML5 spec, says that
1202     ## charset name is case-insensitive.
1203     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1204    
1205     require Message::Charset::Info;
1206     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1207 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1208 wakaba 1.21 if (defined $ic) {
1209     ## TODO: Test for this case
1210     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1211     if ($charset ne $ic_charset) {
1212 wakaba 1.32 $self->{onerror}->(node => $attr,
1213 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1214 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1215     level => $self->{must_level});
1216 wakaba 1.21 }
1217     } else {
1218     ## NOTE: MUST, but not checkable, since the document is not originally
1219     ## in serialized form (or the parser does not preserve the input
1220     ## encoding information).
1221 wakaba 1.32 $self->{onerror}->(node => $attr,
1222     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1223 wakaba 1.21 level => 'unsupported');
1224     }
1225    
1226     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1227     ## Syntactically valid and registered? What about x-charset names?
1228     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1229     ($charset_value)) {
1230 wakaba 1.32 $self->{onerror}->(node => $attr,
1231     type => 'charset:syntax error:'.$charset_value, ## TODO
1232     level => $self->{must_level});
1233 wakaba 1.21 }
1234    
1235     if ($charset) {
1236     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1237     ## with no "preferred MIME name" label)?
1238     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1239     if (($charset_status &
1240     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1241     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1242 wakaba 1.32 $self->{onerror}->(node => $attr,
1243 wakaba 1.21 type => 'charset:not preferred:'.
1244 wakaba 1.32 $charset_value, ## TODO
1245     level => $self->{must_level});
1246 wakaba 1.21 }
1247     if (($charset_status &
1248     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1249     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1250     if ($charset_value =~ /^x-/) {
1251 wakaba 1.32 $self->{onerror}->(node => $attr,
1252     type => 'charset:private:'.$charset_value, ## TODO
1253 wakaba 1.21 level => $self->{good_level});
1254     } else {
1255 wakaba 1.32 $self->{onerror}->(node => $attr,
1256 wakaba 1.21 type => 'charset:not registered:'.
1257 wakaba 1.32 $charset_value, ## TODO
1258 wakaba 1.21 level => $self->{good_level});
1259     }
1260     }
1261     } elsif ($charset_value =~ /^x-/) {
1262 wakaba 1.32 $self->{onerror}->(node => $attr,
1263     type => 'charset:private:'.$charset_value, ## TODO
1264 wakaba 1.21 level => $self->{good_level});
1265     } else {
1266 wakaba 1.32 $self->{onerror}->(node => $attr,
1267     type => 'charset:not registered:'.$charset_value, ## TODO
1268 wakaba 1.21 level => $self->{good_level});
1269     }
1270    
1271 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1272     $self->{onerror}->(node => $attr,
1273 wakaba 1.22 type => 'character reference in charset',
1274     level => $self->{must_level});
1275     }
1276 wakaba 1.32 }; # $check_charset
1277    
1278     ## TODO: metadata conformance
1279    
1280     ## TODO: pragma conformance
1281     if (defined $http_equiv_attr) { ## An enumerated attribute
1282     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1283     if ({
1284     'refresh' => 1,
1285     'default-style' => 1,
1286     }->{$keyword}) {
1287     #
1288 wakaba 1.33
1289     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1290 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1291 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1292     ## 1221, there are still many occurence of "Content-Type" state in
1293     ## the spec.
1294    
1295 wakaba 1.32 $check_charset_decl->();
1296     if ($content_attr) {
1297     my $content = $content_attr->value;
1298     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1299     $check_charset->($content_attr, $1);
1300     } else {
1301     $self->{onerror}->(node => $content_attr,
1302     type => 'meta content-type syntax error',
1303     level => $self->{must_level});
1304     }
1305     }
1306     } else {
1307     $self->{onerror}->(node => $http_equiv_attr,
1308     type => 'enumerated:invalid');
1309     }
1310     }
1311    
1312     if (defined $charset_attr) {
1313     $check_charset_decl->();
1314     $check_charset->($charset_attr, $charset_attr->value);
1315 wakaba 1.1 }
1316     },
1317     };
1318    
1319     $Element->{$HTML_NS}->{style} = {
1320 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1321 wakaba 1.40 %HTMLChecker,
1322     check_attrs => $GetHTMLAttrsChecker->({
1323 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1324     media => $HTMLMQAttrChecker,
1325     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1326     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1327     ## not different
1328 wakaba 1.49 }, {
1329     %HTMLAttrStatus,
1330 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1331     id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1332     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1333     media => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1334     scoped => FEATURE_HTML5_DEFAULT,
1335     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1336     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1337 wakaba 1.1 }),
1338 wakaba 1.40 check_start => sub {
1339     my ($self, $item, $element_state) = @_;
1340    
1341 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1342 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1343 wakaba 1.27 if (not defined $type or
1344     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1345 wakaba 1.40 $element_state->{allow_element} = 0;
1346     $element_state->{style_type} = 'text/css';
1347     } else {
1348     $element_state->{allow_element} = 1; # unknown
1349     $element_state->{style_type} = $type; ## TODO: $type normalization
1350     }
1351     },
1352     check_child_element => sub {
1353     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1354     $child_is_transparent, $element_state) = @_;
1355     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1356     $self->{onerror}->(node => $child_el,
1357     type => 'element not allowed:minus',
1358     level => $self->{must_level});
1359     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1360     #
1361     } elsif ($element_state->{allow_element}) {
1362     #
1363     } else {
1364     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1365     }
1366     },
1367     check_child_text => sub {
1368     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1369     $element_state->{text} .= $child_node->text_content;
1370     },
1371     check_end => sub {
1372     my ($self, $item, $element_state) = @_;
1373     if ($element_state->{style_type} eq 'text/css') {
1374     $self->{onsubdoc}->({s => $element_state->{text},
1375     container_node => $item->{node},
1376 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1377 wakaba 1.27 } else {
1378 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1379     type => 'style:'.$element_state->{style_type});
1380 wakaba 1.27 }
1381 wakaba 1.40
1382     $HTMLChecker{check_end}->(@_);
1383 wakaba 1.1 },
1384     };
1385 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1386 wakaba 1.1
1387     $Element->{$HTML_NS}->{body} = {
1388 wakaba 1.40 %HTMLProseContentChecker,
1389 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1390     check_attrs => $GetHTMLAttrsChecker->({}, {
1391     %HTMLAttrStatus,
1392     %HTMLM12NCommonAttrStatus,
1393     alink => FEATURE_M12N10_REC_DEPRECATED,
1394     background => FEATURE_M12N10_REC_DEPRECATED,
1395     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1396 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1397 wakaba 1.49 link => FEATURE_M12N10_REC_DEPRECATED,
1398 wakaba 1.50 onload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1399     onunload => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1400 wakaba 1.49 text => FEATURE_M12N10_REC_DEPRECATED,
1401     vlink => FEATURE_M12N10_REC_DEPRECATED,
1402     }),
1403 wakaba 1.1 };
1404    
1405     $Element->{$HTML_NS}->{section} = {
1406 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1407 wakaba 1.40 %HTMLProseContentChecker,
1408 wakaba 1.1 };
1409    
1410     $Element->{$HTML_NS}->{nav} = {
1411 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1412 wakaba 1.40 %HTMLProseContentChecker,
1413 wakaba 1.1 };
1414    
1415     $Element->{$HTML_NS}->{article} = {
1416 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1417 wakaba 1.40 %HTMLProseContentChecker,
1418 wakaba 1.1 };
1419    
1420     $Element->{$HTML_NS}->{blockquote} = {
1421 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1422 wakaba 1.40 %HTMLProseContentChecker,
1423     check_attrs => $GetHTMLAttrsChecker->({
1424 wakaba 1.1 cite => $HTMLURIAttrChecker,
1425 wakaba 1.49 }, {
1426     %HTMLAttrStatus,
1427     %HTMLM12NCommonAttrStatus,
1428 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1429     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1430 wakaba 1.1 }),
1431     };
1432    
1433     $Element->{$HTML_NS}->{aside} = {
1434 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1435 wakaba 1.40 %HTMLProseContentChecker,
1436 wakaba 1.1 };
1437    
1438     $Element->{$HTML_NS}->{h1} = {
1439 wakaba 1.40 %HTMLPhrasingContentChecker,
1440 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1441     check_attrs => $GetHTMLAttrsChecker->({}, {
1442     %HTMLAttrStatus,
1443     %HTMLM12NCommonAttrStatus,
1444     align => FEATURE_M12N10_REC_DEPRECATED,
1445 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1446 wakaba 1.49 }),
1447 wakaba 1.40 check_start => sub {
1448     my ($self, $item, $element_state) = @_;
1449     $self->{flag}->{has_hn} = 1;
1450 wakaba 1.1 },
1451     };
1452    
1453 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1454 wakaba 1.1
1455 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1456 wakaba 1.1
1457 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1458 wakaba 1.1
1459 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1460 wakaba 1.1
1461 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1462 wakaba 1.1
1463 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1464    
1465 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1466 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1467 wakaba 1.40 %HTMLProseContentChecker,
1468     check_start => sub {
1469     my ($self, $item, $element_state) = @_;
1470     $self->_add_minus_elements ($element_state,
1471     {$HTML_NS => {qw/header 1 footer 1/}},
1472     $HTMLSectioningContent);
1473     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1474     $self->{flag}->{has_hn} = 0;
1475     },
1476     check_end => sub {
1477     my ($self, $item, $element_state) = @_;
1478     $self->_remove_minus_elements ($element_state);
1479     unless ($self->{flag}->{has_hn}) {
1480     $self->{onerror}->(node => $item->{node},
1481     type => 'element missing:hn');
1482     }
1483     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1484 wakaba 1.1
1485 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1486 wakaba 1.1 },
1487 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1488 wakaba 1.1 };
1489    
1490     $Element->{$HTML_NS}->{footer} = {
1491 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1492 wakaba 1.40 %HTMLProseContentChecker,
1493     check_start => sub {
1494     my ($self, $item, $element_state) = @_;
1495     $self->_add_minus_elements ($element_state,
1496     {$HTML_NS => {footer => 1}},
1497     $HTMLSectioningContent, $HTMLHeadingContent);
1498     },
1499     check_end => sub {
1500     my ($self, $item, $element_state) = @_;
1501     $self->_remove_minus_elements ($element_state);
1502 wakaba 1.1
1503 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1504 wakaba 1.1 },
1505     };
1506    
1507     $Element->{$HTML_NS}->{address} = {
1508 wakaba 1.40 %HTMLProseContentChecker,
1509 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1510     check_attrs => $GetHTMLAttrsChecker->({}, {
1511     %HTMLAttrStatus,
1512     %HTMLM12NCommonAttrStatus,
1513 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1514 wakaba 1.49 }),
1515 wakaba 1.40 check_start => sub {
1516     my ($self, $item, $element_state) = @_;
1517     $self->_add_minus_elements ($element_state,
1518     {$HTML_NS => {footer => 1, address => 1}},
1519     $HTMLSectioningContent, $HTMLHeadingContent);
1520     },
1521     check_end => sub {
1522     my ($self, $item, $element_state) = @_;
1523     $self->_remove_minus_elements ($element_state);
1524 wakaba 1.29
1525 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1526 wakaba 1.29 },
1527 wakaba 1.1 };
1528    
1529     $Element->{$HTML_NS}->{p} = {
1530 wakaba 1.40 %HTMLPhrasingContentChecker,
1531 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1532     check_attrs => $GetHTMLAttrsChecker->({}, {
1533     %HTMLAttrStatus,
1534     %HTMLM12NCommonAttrStatus,
1535     align => FEATURE_M12N10_REC_DEPRECATED,
1536 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1537 wakaba 1.49 }),
1538 wakaba 1.1 };
1539    
1540     $Element->{$HTML_NS}->{hr} = {
1541 wakaba 1.40 %HTMLEmptyChecker,
1542 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1543     check_attrs => $GetHTMLAttrsChecker->({}, {
1544     %HTMLAttrStatus,
1545     %HTMLM12NCommonAttrStatus,
1546     align => FEATURE_M12N10_REC_DEPRECATED,
1547 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1548 wakaba 1.49 noshade => FEATURE_M12N10_REC_DEPRECATED,
1549     size => FEATURE_M12N10_REC_DEPRECATED,
1550     width => FEATURE_M12N10_REC_DEPRECATED,
1551     }),
1552 wakaba 1.1 };
1553    
1554     $Element->{$HTML_NS}->{br} = {
1555 wakaba 1.40 %HTMLEmptyChecker,
1556 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1557     check_attrs => $GetHTMLAttrsChecker->({}, {
1558     %HTMLAttrStatus,
1559 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1560 wakaba 1.49 clear => FEATURE_M12N10_REC_DEPRECATED,
1561 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1562 wakaba 1.49 style => FEATURE_XHTML10_REC,
1563 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1564 wakaba 1.49 }),
1565 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1566     ## (This requirement is semantic so that we cannot check.)
1567 wakaba 1.1 };
1568    
1569     $Element->{$HTML_NS}->{dialog} = {
1570 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1571 wakaba 1.40 %HTMLChecker,
1572     check_start => sub {
1573     my ($self, $item, $element_state) = @_;
1574     $element_state->{phase} = 'before dt';
1575     },
1576     check_child_element => sub {
1577     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1578     $child_is_transparent, $element_state) = @_;
1579     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1580     $self->{onerror}->(node => $child_el,
1581     type => 'element not allowed:minus',
1582     level => $self->{must_level});
1583     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1584     #
1585     } elsif ($element_state->{phase} eq 'before dt') {
1586     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1587     $element_state->{phase} = 'before dd';
1588     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1589     $self->{onerror}
1590     ->(node => $child_el, type => 'ps element missing:dt');
1591     $element_state->{phase} = 'before dt';
1592     } else {
1593     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1594     }
1595     } elsif ($element_state->{phase} eq 'before dd') {
1596     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1597     $element_state->{phase} = 'before dt';
1598     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1599     $self->{onerror}
1600     ->(node => $child_el, type => 'ps element missing:dd');
1601     $element_state->{phase} = 'before dd';
1602     } else {
1603     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1604 wakaba 1.1 }
1605 wakaba 1.40 } else {
1606     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1607     }
1608     },
1609     check_child_text => sub {
1610     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1611     if ($has_significant) {
1612     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1613 wakaba 1.1 }
1614 wakaba 1.40 },
1615     check_end => sub {
1616     my ($self, $item, $element_state) = @_;
1617     if ($element_state->{phase} eq 'before dd') {
1618     $self->{onerror}->(node => $item->{node},
1619     type => 'child element missing:dd');
1620 wakaba 1.1 }
1621 wakaba 1.40
1622     $HTMLChecker{check_end}->(@_);
1623 wakaba 1.1 },
1624     };
1625    
1626     $Element->{$HTML_NS}->{pre} = {
1627 wakaba 1.40 %HTMLPhrasingContentChecker,
1628 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1629     check_attrs => $GetHTMLAttrsChecker->({}, {
1630     %HTMLAttrStatus,
1631     %HTMLM12NCommonAttrStatus,
1632 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1633 wakaba 1.49 width => FEATURE_M12N10_REC_DEPRECATED,
1634     }),
1635 wakaba 1.1 };
1636    
1637     $Element->{$HTML_NS}->{ol} = {
1638 wakaba 1.40 %HTMLChecker,
1639 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1640 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1641 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1642 wakaba 1.49 }, {
1643     %HTMLAttrStatus,
1644     %HTMLM12NCommonAttrStatus,
1645     compact => FEATURE_M12N10_REC_DEPRECATED,
1646 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1647     start => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1648 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1649 wakaba 1.1 }),
1650 wakaba 1.40 check_child_element => sub {
1651     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1652     $child_is_transparent, $element_state) = @_;
1653     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1654     $self->{onerror}->(node => $child_el,
1655     type => 'element not allowed:minus',
1656     level => $self->{must_level});
1657     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1658     #
1659     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1660     #
1661     } else {
1662     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1663 wakaba 1.1 }
1664 wakaba 1.40 },
1665     check_child_text => sub {
1666     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1667     if ($has_significant) {
1668     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1669 wakaba 1.1 }
1670     },
1671     };
1672    
1673     $Element->{$HTML_NS}->{ul} = {
1674 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1675 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1676     check_attrs => $GetHTMLAttrsChecker->({}, {
1677     %HTMLAttrStatus,
1678     %HTMLM12NCommonAttrStatus,
1679     compact => FEATURE_M12N10_REC_DEPRECATED,
1680 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1681 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1682     }),
1683 wakaba 1.1 };
1684    
1685     $Element->{$HTML_NS}->{li} = {
1686 wakaba 1.40 %HTMLProseContentChecker,
1687 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1688 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1689 wakaba 1.49 value => sub {
1690 wakaba 1.1 my ($self, $attr) = @_;
1691     my $parent = $attr->owner_element->manakai_parent_element;
1692     if (defined $parent) {
1693     my $parent_ns = $parent->namespace_uri;
1694     $parent_ns = '' unless defined $parent_ns;
1695     my $parent_ln = $parent->manakai_local_name;
1696     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1697     $self->{onerror}->(node => $attr, level => 'unsupported',
1698     type => 'attribute');
1699     }
1700     }
1701     $HTMLIntegerAttrChecker->($self, $attr);
1702 wakaba 1.49 }, ## TODO: test
1703     }, {
1704     %HTMLAttrStatus,
1705     %HTMLM12NCommonAttrStatus,
1706 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1707 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1708 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC_DEPRECATED,
1709 wakaba 1.1 }),
1710 wakaba 1.40 check_child_element => sub {
1711     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1712     $child_is_transparent, $element_state) = @_;
1713     if ($self->{flag}->{in_menu}) {
1714     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1715     } else {
1716     $HTMLProseContentChecker{check_child_element}->(@_);
1717     }
1718     },
1719     check_child_text => sub {
1720     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1721     if ($self->{flag}->{in_menu}) {
1722     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1723 wakaba 1.1 } else {
1724 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1725 wakaba 1.1 }
1726     },
1727     };
1728    
1729     $Element->{$HTML_NS}->{dl} = {
1730 wakaba 1.40 %HTMLChecker,
1731 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1732     check_attrs => $GetHTMLAttrsChecker->({}, {
1733     %HTMLAttrStatus,
1734     %HTMLM12NCommonAttrStatus,
1735     compact => FEATURE_M12N10_REC_DEPRECATED,
1736 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1737 wakaba 1.49 type => FEATURE_M12N10_REC_DEPRECATED,
1738     }),
1739 wakaba 1.40 check_start => sub {
1740     my ($self, $item, $element_state) = @_;
1741     $element_state->{phase} = 'before dt';
1742     },
1743     check_child_element => sub {
1744     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1745     $child_is_transparent, $element_state) = @_;
1746     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1747     $self->{onerror}->(node => $child_el,
1748     type => 'element not allowed:minus',
1749     level => $self->{must_level});
1750     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1751     #
1752     } elsif ($element_state->{phase} eq 'in dds') {
1753     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1754     #$element_state->{phase} = 'in dds';
1755     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1756     $element_state->{phase} = 'in dts';
1757     } else {
1758     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1759     }
1760     } elsif ($element_state->{phase} eq 'in dts') {
1761     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1762     #$element_state->{phase} = 'in dts';
1763     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1764     $element_state->{phase} = 'in dds';
1765     } else {
1766     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1767     }
1768     } elsif ($element_state->{phase} eq 'before dt') {
1769     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1770     $element_state->{phase} = 'in dts';
1771     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1772     $self->{onerror}
1773     ->(node => $child_el, type => 'ps element missing:dt');
1774     $element_state->{phase} = 'in dds';
1775     } else {
1776     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1777 wakaba 1.1 }
1778 wakaba 1.40 } else {
1779     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1780 wakaba 1.1 }
1781 wakaba 1.40 },
1782     check_child_text => sub {
1783     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1784     if ($has_significant) {
1785     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1786     }
1787     },
1788     check_end => sub {
1789     my ($self, $item, $element_state) = @_;
1790     if ($element_state->{phase} eq 'in dts') {
1791     $self->{onerror}->(node => $item->{node},
1792     type => 'child element missing:dd');
1793 wakaba 1.1 }
1794    
1795 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1796 wakaba 1.1 },
1797     };
1798    
1799     $Element->{$HTML_NS}->{dt} = {
1800 wakaba 1.40 %HTMLPhrasingContentChecker,
1801 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1802     check_attrs => $GetHTMLAttrsChecker->({}, {
1803     %HTMLAttrStatus,
1804     %HTMLM12NCommonAttrStatus,
1805 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1806 wakaba 1.49 }),
1807 wakaba 1.1 };
1808    
1809     $Element->{$HTML_NS}->{dd} = {
1810 wakaba 1.40 %HTMLProseContentChecker,
1811 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1812     check_attrs => $GetHTMLAttrsChecker->({}, {
1813     %HTMLAttrStatus,
1814     %HTMLM12NCommonAttrStatus,
1815 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1816 wakaba 1.49 }),
1817 wakaba 1.1 };
1818    
1819     $Element->{$HTML_NS}->{a} = {
1820 wakaba 1.40 %HTMLPhrasingContentChecker,
1821 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1822 wakaba 1.40 check_attrs => sub {
1823     my ($self, $item, $element_state) = @_;
1824 wakaba 1.1 my %attr;
1825 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1826 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1827     $attr_ns = '' unless defined $attr_ns;
1828     my $attr_ln = $attr->manakai_local_name;
1829     my $checker;
1830     if ($attr_ns eq '') {
1831     $checker = {
1832     target => $HTMLTargetAttrChecker,
1833     href => $HTMLURIAttrChecker,
1834     ping => $HTMLSpaceURIsAttrChecker,
1835 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1836 wakaba 1.1 media => $HTMLMQAttrChecker,
1837     hreflang => $HTMLLanguageTagAttrChecker,
1838     type => $HTMLIMTAttrChecker,
1839     }->{$attr_ln};
1840     if ($checker) {
1841     $attr{$attr_ln} = $attr;
1842     } else {
1843     $checker = $HTMLAttrChecker->{$attr_ln};
1844     }
1845     }
1846     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1847     || $AttrChecker->{$attr_ns}->{''};
1848     if ($checker) {
1849     $checker->($self, $attr) if ref $checker;
1850 wakaba 1.49 } elsif ($attr_ns eq '') {
1851     $self->{onerror}->(node => $attr, level => $self->{must_level},
1852     type => 'attribute not defined');
1853 wakaba 1.1 } else {
1854     $self->{onerror}->(node => $attr, level => 'unsupported',
1855     type => 'attribute');
1856 wakaba 1.50 ## ISSUE: No conformance createria for unknown attributes in the spec
1857 wakaba 1.1 }
1858 wakaba 1.49
1859     if ($attr_ns eq '') {
1860     $self->_attr_status_info ($attr, {
1861     %HTMLAttrStatus,
1862     %HTMLM12NCommonAttrStatus,
1863     accesskey => FEATURE_M12N10_REC,
1864     charset => FEATURE_M12N10_REC,
1865     coords => FEATURE_M12N10_REC,
1866 wakaba 1.50 href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1867     hreflang => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1868     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1869     media => FEATURE_HTML5_DEFAULT,
1870 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
1871 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1872     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1873     ping => FEATURE_HTML5_DEFAULT,
1874     rel => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1875 wakaba 1.49 rev => FEATURE_M12N10_REC,
1876     shape => FEATURE_M12N10_REC,
1877 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1878     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1879     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1880 wakaba 1.49 }->{$attr_ln});
1881     }
1882 wakaba 1.1 }
1883    
1884 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1885 wakaba 1.4 if (defined $attr{href}) {
1886     $self->{has_hyperlink_element} = 1;
1887 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1888 wakaba 1.4 } else {
1889 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1890     if (defined $attr{$_}) {
1891     $self->{onerror}->(node => $attr{$_},
1892     type => 'attribute not allowed');
1893     }
1894     }
1895     }
1896     },
1897 wakaba 1.40 check_start => sub {
1898     my ($self, $item, $element_state) = @_;
1899     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1900     },
1901     check_end => sub {
1902     my ($self, $item, $element_state) = @_;
1903     $self->_remove_minus_elements ($element_state);
1904 wakaba 1.1
1905 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1906 wakaba 1.1 },
1907     };
1908    
1909     $Element->{$HTML_NS}->{q} = {
1910 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1911 wakaba 1.40 %HTMLPhrasingContentChecker,
1912     check_attrs => $GetHTMLAttrsChecker->({
1913 wakaba 1.50 cite => $HTMLURIAttrChecker,
1914     }, {
1915 wakaba 1.49 %HTMLAttrStatus,
1916     %HTMLM12NCommonAttrStatus,
1917 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1918     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1919 wakaba 1.1 }),
1920     };
1921    
1922     $Element->{$HTML_NS}->{cite} = {
1923 wakaba 1.40 %HTMLPhrasingContentChecker,
1924 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1925     check_attrs => $GetHTMLAttrsChecker->({}, {
1926     %HTMLAttrStatus,
1927     %HTMLM12NCommonAttrStatus,
1928 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1929 wakaba 1.49 }),
1930 wakaba 1.1 };
1931    
1932     $Element->{$HTML_NS}->{em} = {
1933 wakaba 1.40 %HTMLPhrasingContentChecker,
1934 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1935     check_attrs => $GetHTMLAttrsChecker->({}, {
1936     %HTMLAttrStatus,
1937     %HTMLM12NCommonAttrStatus,
1938 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1939 wakaba 1.49 }),
1940 wakaba 1.1 };
1941    
1942     $Element->{$HTML_NS}->{strong} = {
1943 wakaba 1.40 %HTMLPhrasingContentChecker,
1944 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1945     check_attrs => $GetHTMLAttrsChecker->({}, {
1946     %HTMLAttrStatus,
1947     %HTMLM12NCommonAttrStatus,
1948 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1949 wakaba 1.49 }),
1950 wakaba 1.1 };
1951    
1952     $Element->{$HTML_NS}->{small} = {
1953 wakaba 1.40 %HTMLPhrasingContentChecker,
1954 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1955     check_attrs => $GetHTMLAttrsChecker->({}, {
1956     %HTMLAttrStatus,
1957     %HTMLM12NCommonAttrStatus,
1958 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1959 wakaba 1.49 }),
1960 wakaba 1.1 };
1961    
1962 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
1963 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1964 wakaba 1.40 %HTMLPhrasingContentChecker,
1965 wakaba 1.1 };
1966    
1967     $Element->{$HTML_NS}->{dfn} = {
1968 wakaba 1.40 %HTMLPhrasingContentChecker,
1969 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1970     check_attrs => $GetHTMLAttrsChecker->({}, {
1971     %HTMLAttrStatus,
1972     %HTMLM12NCommonAttrStatus,
1973 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
1974 wakaba 1.49 }),
1975 wakaba 1.40 check_start => sub {
1976     my ($self, $item, $element_state) = @_;
1977     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
1978 wakaba 1.1
1979 wakaba 1.40 my $node = $item->{node};
1980 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
1981     unless (defined $term) {
1982     for my $child (@{$node->child_nodes}) {
1983     if ($child->node_type == 1) { # ELEMENT_NODE
1984     if (defined $term) {
1985     undef $term;
1986     last;
1987     } elsif ($child->manakai_local_name eq 'abbr') {
1988     my $nsuri = $child->namespace_uri;
1989     if (defined $nsuri and $nsuri eq $HTML_NS) {
1990     my $attr = $child->get_attribute_node_ns (undef, 'title');
1991     if ($attr) {
1992     $term = $attr->value;
1993     }
1994     }
1995     }
1996     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1997     ## TEXT_NODE or CDATA_SECTION_NODE
1998     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1999     next;
2000     }
2001     undef $term;
2002     last;
2003     }
2004     }
2005     unless (defined $term) {
2006     $term = $node->text_content;
2007     }
2008     }
2009     if ($self->{term}->{$term}) {
2010     $self->{onerror}->(node => $node, type => 'duplicate term');
2011     push @{$self->{term}->{$term}}, $node;
2012     } else {
2013     $self->{term}->{$term} = [$node];
2014     }
2015     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
2016     ## has |title|.
2017 wakaba 1.40 },
2018     check_end => sub {
2019     my ($self, $item, $element_state) = @_;
2020     $self->_remove_minus_elements ($element_state);
2021 wakaba 1.1
2022 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2023 wakaba 1.1 },
2024     };
2025    
2026     $Element->{$HTML_NS}->{abbr} = {
2027 wakaba 1.40 %HTMLPhrasingContentChecker,
2028 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2029     check_attrs => $GetHTMLAttrsChecker->({}, {
2030     %HTMLAttrStatus,
2031     %HTMLM12NCommonAttrStatus,
2032 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2033 wakaba 1.49 }),
2034     };
2035    
2036     $Element->{$HTML_NS}->{acronym} = {
2037     %HTMLPhrasingContentChecker,
2038     status => FEATURE_M12N10_REC,
2039     check_attrs => $GetHTMLAttrsChecker->({}, {
2040     %HTMLAttrStatus,
2041     %HTMLM12NCommonAttrStatus,
2042 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2043 wakaba 1.49 }),
2044 wakaba 1.1 };
2045    
2046     $Element->{$HTML_NS}->{time} = {
2047 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2048 wakaba 1.40 %HTMLPhrasingContentChecker,
2049     check_attrs => $GetHTMLAttrsChecker->({
2050 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2051 wakaba 1.49 }, {
2052     %HTMLAttrStatus,
2053     %HTMLM12NCommonAttrStatus,
2054 wakaba 1.50 datetime => FEATURE_HTML5_DEFAULT,
2055 wakaba 1.1 }),
2056     ## TODO: Write tests
2057 wakaba 1.40 check_end => sub {
2058     my ($self, $item, $element_state) = @_;
2059 wakaba 1.1
2060 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2061 wakaba 1.1 my $input;
2062     my $reg_sp;
2063     my $input_node;
2064     if ($attr) {
2065     $input = $attr->value;
2066     $reg_sp = qr/[\x09-\x0D\x20]*/;
2067     $input_node = $attr;
2068     } else {
2069 wakaba 1.40 $input = $item->{node}->text_content;
2070 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2071 wakaba 1.40 $input_node = $item->{node};
2072 wakaba 1.1
2073     ## ISSUE: What is the definition for "successfully extracts a date
2074     ## or time"? If the algorithm says the string is invalid but
2075     ## return some date or time, is it "successfully"?
2076     }
2077    
2078     my $hour;
2079     my $minute;
2080     my $second;
2081     if ($input =~ /
2082     \A
2083     [\x09-\x0D\x20]*
2084     ([0-9]+) # 1
2085     (?>
2086     -([0-9]+) # 2
2087     -([0-9]+) # 3
2088     [\x09-\x0D\x20]*
2089     (?>
2090     T
2091     [\x09-\x0D\x20]*
2092     )?
2093     ([0-9]+) # 4
2094     :([0-9]+) # 5
2095     (?>
2096     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2097     )?
2098     [\x09-\x0D\x20]*
2099     (?>
2100     Z
2101     [\x09-\x0D\x20]*
2102     |
2103     [+-]([0-9]+):([0-9]+) # 7, 8
2104     [\x09-\x0D\x20]*
2105     )?
2106     \z
2107     |
2108     :([0-9]+) # 9
2109     (?>
2110     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2111     )?
2112     [\x09-\x0D\x20]*\z
2113     )
2114     /x) {
2115     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2116     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2117     length $4 != 2 or length $5 != 2) {
2118     $self->{onerror}->(node => $input_node,
2119     type => 'dateortime:syntax error');
2120     }
2121    
2122     if (1 <= $2 and $2 <= 12) {
2123     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2124     if $3 < 1 or
2125     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2126     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2127     if $2 == 2 and $3 == 29 and
2128     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2129     } else {
2130     $self->{onerror}->(node => $input_node,
2131     type => 'datetime:bad month');
2132     }
2133    
2134     ($hour, $minute, $second) = ($4, $5, $6);
2135    
2136     if (defined $7) { ## [+-]hh:mm
2137     if (length $7 != 2 or length $8 != 2) {
2138     $self->{onerror}->(node => $input_node,
2139     type => 'dateortime:syntax error');
2140     }
2141    
2142     $self->{onerror}->(node => $input_node,
2143     type => 'datetime:bad timezone hour')
2144     if $7 > 23;
2145     $self->{onerror}->(node => $input_node,
2146     type => 'datetime:bad timezone minute')
2147     if $8 > 59;
2148     }
2149     } else { ## hh:mm
2150     if (length $1 != 2 or length $9 != 2) {
2151     $self->{onerror}->(node => $input_node,
2152     type => qq'dateortime:syntax error');
2153     }
2154    
2155     ($hour, $minute, $second) = ($1, $9, $10);
2156     }
2157    
2158     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2159     if $hour > 23;
2160     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2161     if $minute > 59;
2162    
2163     if (defined $second) { ## s
2164     ## NOTE: Integer part of second don't have to have length of two.
2165    
2166     if (substr ($second, 0, 1) eq '.') {
2167     $self->{onerror}->(node => $input_node,
2168     type => 'dateortime:syntax error');
2169     }
2170    
2171     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2172     if $second >= 60;
2173     }
2174     } else {
2175     $self->{onerror}->(node => $input_node,
2176     type => 'dateortime:syntax error');
2177     }
2178    
2179 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2180 wakaba 1.1 },
2181     };
2182    
2183     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2184 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2185 wakaba 1.40 %HTMLPhrasingContentChecker,
2186     check_attrs => $GetHTMLAttrsChecker->({
2187 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2188     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2189     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2190     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2191     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2192     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2193 wakaba 1.50 }, {
2194     %HTMLAttrStatus,
2195     high => FEATURE_HTML5_DEFAULT,
2196     low => FEATURE_HTML5_DEFAULT,
2197     max => FEATURE_HTML5_DEFAULT,
2198     min => FEATURE_HTML5_DEFAULT,
2199     optimum => FEATURE_HTML5_DEFAULT,
2200     value => FEATURE_HTML5_DEFAULT,
2201 wakaba 1.1 }),
2202     };
2203    
2204     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2205 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2206 wakaba 1.40 %HTMLPhrasingContentChecker,
2207     check_attrs => $GetHTMLAttrsChecker->({
2208 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2209     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2210 wakaba 1.50 }, {
2211     %HTMLAttrStatus,
2212     max => FEATURE_HTML5_DEFAULT,
2213     value => FEATURE_HTML5_DEFAULT,
2214 wakaba 1.1 }),
2215     };
2216    
2217     $Element->{$HTML_NS}->{code} = {
2218 wakaba 1.40 %HTMLPhrasingContentChecker,
2219 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2220     check_attrs => $GetHTMLAttrsChecker->({}, {
2221     %HTMLAttrStatus,
2222     %HTMLM12NCommonAttrStatus,
2223 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2224 wakaba 1.49 }),
2225 wakaba 1.1 };
2226    
2227     $Element->{$HTML_NS}->{var} = {
2228 wakaba 1.40 %HTMLPhrasingContentChecker,
2229 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2230     check_attrs => $GetHTMLAttrsChecker->({}, {
2231     %HTMLAttrStatus,
2232     %HTMLM12NCommonAttrStatus,
2233 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2234 wakaba 1.49 }),
2235 wakaba 1.1 };
2236    
2237     $Element->{$HTML_NS}->{samp} = {
2238 wakaba 1.40 %HTMLPhrasingContentChecker,
2239 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2240     check_attrs => $GetHTMLAttrsChecker->({}, {
2241     %HTMLAttrStatus,
2242     %HTMLM12NCommonAttrStatus,
2243 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2244 wakaba 1.49 }),
2245 wakaba 1.1 };
2246    
2247     $Element->{$HTML_NS}->{kbd} = {
2248 wakaba 1.40 %HTMLPhrasingContentChecker,
2249 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2250     check_attrs => $GetHTMLAttrsChecker->({}, {
2251     %HTMLAttrStatus,
2252     %HTMLM12NCommonAttrStatus,
2253 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2254 wakaba 1.49 }),
2255 wakaba 1.1 };
2256    
2257     $Element->{$HTML_NS}->{sub} = {
2258 wakaba 1.40 %HTMLPhrasingContentChecker,
2259 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2260     check_attrs => $GetHTMLAttrsChecker->({}, {
2261     %HTMLAttrStatus,
2262     %HTMLM12NCommonAttrStatus,
2263 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2264 wakaba 1.49 }),
2265 wakaba 1.1 };
2266    
2267     $Element->{$HTML_NS}->{sup} = {
2268 wakaba 1.40 %HTMLPhrasingContentChecker,
2269 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2270     check_attrs => $GetHTMLAttrsChecker->({}, {
2271     %HTMLAttrStatus,
2272     %HTMLM12NCommonAttrStatus,
2273 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2274 wakaba 1.49 }),
2275 wakaba 1.1 };
2276    
2277     $Element->{$HTML_NS}->{span} = {
2278 wakaba 1.40 %HTMLPhrasingContentChecker,
2279 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2280     check_attrs => $GetHTMLAttrsChecker->({}, {
2281     %HTMLAttrStatus,
2282     %HTMLM12NCommonAttrStatus,
2283     datafld => FEATURE_HTML4_REC_RESERVED,
2284     dataformatas => FEATURE_HTML4_REC_RESERVED,
2285     datasrc => FEATURE_HTML4_REC_RESERVED,
2286 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2287 wakaba 1.49 }),
2288 wakaba 1.1 };
2289    
2290     $Element->{$HTML_NS}->{i} = {
2291 wakaba 1.40 %HTMLPhrasingContentChecker,
2292 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2293     check_attrs => $GetHTMLAttrsChecker->({}, {
2294     %HTMLAttrStatus,
2295     %HTMLM12NCommonAttrStatus,
2296 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2297 wakaba 1.49 }),
2298 wakaba 1.1 };
2299    
2300     $Element->{$HTML_NS}->{b} = {
2301 wakaba 1.40 %HTMLPhrasingContentChecker,
2302 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2303     check_attrs => $GetHTMLAttrsChecker->({}, {
2304     %HTMLAttrStatus,
2305     %HTMLM12NCommonAttrStatus,
2306 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2307 wakaba 1.49 }),
2308 wakaba 1.1 };
2309    
2310     $Element->{$HTML_NS}->{bdo} = {
2311 wakaba 1.40 %HTMLPhrasingContentChecker,
2312 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2313 wakaba 1.40 check_attrs => sub {
2314     my ($self, $item, $element_state) = @_;
2315 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2316     %HTMLAttrStatus,
2317 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2318     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2319     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2320 wakaba 1.49 style => FEATURE_XHTML10_REC,
2321 wakaba 1.50 title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2322     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2323 wakaba 1.49 })->($self, $item, $element_state);
2324 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2325     $self->{onerror}->(node => $item->{node},
2326     type => 'attribute missing:dir');
2327 wakaba 1.1 }
2328     },
2329     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2330     };
2331    
2332 wakaba 1.49 ## TODO: big, tt: Common lang(xhtml10)
2333    
2334 wakaba 1.29 =pod
2335    
2336     ## TODO:
2337    
2338     +
2339     + <p>Partly because of the confusion described above, authors are
2340     + strongly recommended to always mark up all paragraphs with the
2341     + <code>p</code> element, and to not have any <code>ins</code> or
2342     + <code>del</code> elements that cross across any <span
2343     + title="paragraph">implied paragraphs</span>.</p>
2344     +
2345     (An informative note)
2346    
2347     <p><code>ins</code> elements should not cross <span
2348     + title="paragraph">implied paragraph</span> boundaries.</p>
2349     (normative)
2350    
2351     + <p><code>del</code> elements should not cross <span
2352     + title="paragraph">implied paragraph</span> boundaries.</p>
2353     (normative)
2354    
2355     =cut
2356    
2357 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2358 wakaba 1.40 %HTMLTransparentChecker,
2359 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2360 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2361 wakaba 1.1 cite => $HTMLURIAttrChecker,
2362     datetime => $HTMLDatetimeAttrChecker,
2363 wakaba 1.49 }, {
2364     %HTMLAttrStatus,
2365     %HTMLM12NCommonAttrStatus,
2366 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2367     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2368     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2369 wakaba 1.1 }),
2370     };
2371    
2372     $Element->{$HTML_NS}->{del} = {
2373 wakaba 1.40 %HTMLTransparentChecker,
2374 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2375 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2376 wakaba 1.1 cite => $HTMLURIAttrChecker,
2377     datetime => $HTMLDatetimeAttrChecker,
2378 wakaba 1.49 }, {
2379     %HTMLAttrStatus,
2380     %HTMLM12NCommonAttrStatus,
2381 wakaba 1.50 cite => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2382     datetime => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2383     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2384 wakaba 1.1 }),
2385 wakaba 1.40 check_end => sub {
2386     my ($self, $item, $element_state) = @_;
2387     if ($element_state->{has_significant}) {
2388     ## NOTE: Significantness flag does not propagate.
2389     } elsif ($item->{transparent}) {
2390     #
2391     } else {
2392     $self->{onerror}->(node => $item->{node},
2393     level => $self->{should_level},
2394     type => 'no significant content');
2395     }
2396 wakaba 1.1 },
2397     };
2398    
2399 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2400 wakaba 1.40 %HTMLProseContentChecker,
2401 wakaba 1.48 status => FEATURE_HTML5_FD,
2402 wakaba 1.41 ## NOTE: legend, Prose | Prose, legend
2403     check_child_element => sub {
2404     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2405     $child_is_transparent, $element_state) = @_;
2406     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2407     $self->{onerror}->(node => $child_el,
2408     type => 'element not allowed:minus',
2409     level => $self->{must_level});
2410     $element_state->{has_non_legend} = 1;
2411     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2412     #
2413     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2414     if ($element_state->{has_legend_at_first}) {
2415     $self->{onerror}->(node => $child_el,
2416     type => 'element not allowed:figure legend',
2417     level => $self->{must_level});
2418     } elsif ($element_state->{has_legend}) {
2419     $self->{onerror}->(node => $element_state->{has_legend},
2420     type => 'element not allowed:figure legend',
2421     level => $self->{must_level});
2422     $element_state->{has_legend} = $child_el;
2423     } elsif ($element_state->{has_non_legend}) {
2424     $element_state->{has_legend} = $child_el;
2425     } else {
2426     $element_state->{has_legend_at_first} = 1;
2427 wakaba 1.35 }
2428 wakaba 1.41 delete $element_state->{has_non_legend};
2429     } else {
2430     $HTMLProseContentChecker{check_child_element}->(@_);
2431 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2432 wakaba 1.41 }
2433     },
2434     check_child_text => sub {
2435     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2436     if ($has_significant) {
2437     $element_state->{has_non_legend} = 1;
2438 wakaba 1.35 }
2439 wakaba 1.41 },
2440     check_end => sub {
2441     my ($self, $item, $element_state) = @_;
2442 wakaba 1.35
2443 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2444     #
2445     } elsif ($element_state->{has_legend}) {
2446     if ($element_state->{has_non_legend}) {
2447     $self->{onerror}->(node => $element_state->{has_legend},
2448 wakaba 1.35 type => 'element not allowed:figure legend',
2449     level => $self->{must_level});
2450     }
2451     } else {
2452 wakaba 1.41 $self->{onerror}->(node => $item->{node},
2453 wakaba 1.35 type => 'element missing:legend',
2454     level => $self->{must_level});
2455     }
2456 wakaba 1.41
2457     $HTMLProseContentChecker{check_end}->(@_);
2458     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2459 wakaba 1.35 },
2460     };
2461 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2462 wakaba 1.1
2463     $Element->{$HTML_NS}->{img} = {
2464 wakaba 1.40 %HTMLEmptyChecker,
2465 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2466 wakaba 1.40 check_attrs => sub {
2467     my ($self, $item, $element_state) = @_;
2468 wakaba 1.1 $GetHTMLAttrsChecker->({
2469     alt => sub { }, ## NOTE: No syntactical requirement
2470     src => $HTMLURIAttrChecker,
2471     usemap => $HTMLUsemapAttrChecker,
2472     ismap => sub {
2473 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2474     if (not $self->{flag}->{in_a_href}) {
2475 wakaba 1.15 $self->{onerror}->(node => $attr,
2476     type => 'attribute not allowed:ismap');
2477 wakaba 1.1 }
2478 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2479 wakaba 1.1 },
2480     ## TODO: height
2481     ## TODO: width
2482 wakaba 1.49 }, {
2483     %HTMLAttrStatus,
2484     %HTMLM12NCommonAttrStatus,
2485     align => FEATURE_M12N10_REC_DEPRECATED,
2486 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2487 wakaba 1.49 border => FEATURE_M12N10_REC_DEPRECATED,
2488 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2489 wakaba 1.49 hspace => FEATURE_M12N10_REC_DEPRECATED,
2490 wakaba 1.50 ismap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2491     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2492 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2493     name => FEATURE_M12N10_REC_DEPRECATED,
2494 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2495     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2496 wakaba 1.49 vspace => FEATURE_M12N10_REC_DEPRECATED,
2497 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2498 wakaba 1.40 })->($self, $item);
2499     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2500     $self->{onerror}->(node => $item->{node},
2501 wakaba 1.37 type => 'attribute missing:alt',
2502     level => $self->{should_level});
2503 wakaba 1.1 }
2504 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2505     $self->{onerror}->(node => $item->{node},
2506     type => 'attribute missing:src');
2507 wakaba 1.1 }
2508     },
2509     };
2510    
2511     $Element->{$HTML_NS}->{iframe} = {
2512 wakaba 1.40 %HTMLTextChecker,
2513 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2514     ## NOTE: Not part of M12N10 Strict
2515 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2516 wakaba 1.1 src => $HTMLURIAttrChecker,
2517 wakaba 1.49 }, {
2518     %HTMLAttrStatus,
2519     %HTMLM12NCommonAttrStatus,
2520     align => FEATURE_XHTML10_REC,
2521 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2522 wakaba 1.49 frameborder => FEATURE_M12N10_REC,
2523     height => FEATURE_M12N10_REC,
2524 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2525 wakaba 1.49 longdesc => FEATURE_M12N10_REC,
2526     marginheight => FEATURE_M12N10_REC,
2527     marginwidth => FEATURE_M12N10_REC,
2528     name => FEATURE_M12N10_REC_DEPRECATED,
2529     scrolling => FEATURE_M12N10_REC,
2530 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2531     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2532 wakaba 1.49 width => FEATURE_M12N10_REC,
2533 wakaba 1.1 }),
2534 wakaba 1.40 };
2535    
2536 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2537 wakaba 1.40 %HTMLEmptyChecker,
2538 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2539 wakaba 1.40 check_attrs => sub {
2540     my ($self, $item, $element_state) = @_;
2541 wakaba 1.1 my $has_src;
2542 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2543 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2544     $attr_ns = '' unless defined $attr_ns;
2545     my $attr_ln = $attr->manakai_local_name;
2546     my $checker;
2547     if ($attr_ns eq '') {
2548     if ($attr_ln eq 'src') {
2549     $checker = $HTMLURIAttrChecker;
2550     $has_src = 1;
2551     } elsif ($attr_ln eq 'type') {
2552     $checker = $HTMLIMTAttrChecker;
2553     } else {
2554     ## TODO: height
2555     ## TODO: width
2556     $checker = $HTMLAttrChecker->{$attr_ln}
2557     || sub { }; ## NOTE: Any local attribute is ok.
2558     }
2559     }
2560     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2561     || $AttrChecker->{$attr_ns}->{''};
2562     if ($checker) {
2563     $checker->($self, $attr);
2564 wakaba 1.50 } elsif ($attr_ns eq '') {
2565     $self->{onerror}->(node => $attr, level => $self->{must_level},
2566     type => 'attribute not defined');
2567 wakaba 1.1 } else {
2568     $self->{onerror}->(node => $attr, level => 'unsupported',
2569     type => 'attribute');
2570 wakaba 1.50 ## ISSUE: No conformance createria for global attributes in the spec
2571     }
2572    
2573     if ($attr_ns eq '') {
2574     my $status = {
2575     %HTMLAttrStatus,
2576     height => FEATURE_HTML5_DEFAULT,
2577     src => FEATURE_HTML5_DEFAULT,
2578     type => FEATURE_HTML5_DEFAULT,
2579     width => FEATURE_HTML5_DEFAULT,
2580     }->{$attr_ln};
2581     $self->_attr_status_info ($attr, $status) if $status;
2582 wakaba 1.1 }
2583     }
2584    
2585     unless ($has_src) {
2586 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2587 wakaba 1.1 type => 'attribute missing:src');
2588     }
2589     },
2590     };
2591    
2592 wakaba 1.49 ## TODO:
2593     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2594     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2595    
2596 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2597 wakaba 1.40 %HTMLTransparentChecker,
2598 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2599 wakaba 1.40 check_attrs => sub {
2600     my ($self, $item, $element_state) = @_;
2601 wakaba 1.1 $GetHTMLAttrsChecker->({
2602     data => $HTMLURIAttrChecker,
2603     type => $HTMLIMTAttrChecker,
2604     usemap => $HTMLUsemapAttrChecker,
2605     ## TODO: width
2606     ## TODO: height
2607 wakaba 1.49 }, {
2608     %HTMLAttrStatus,
2609     %HTMLM12NCommonAttrStatus,
2610     align => FEATURE_XHTML10_REC,
2611     archive => FEATURE_M12N10_REC,
2612     border => FEATURE_XHTML10_REC,
2613     classid => FEATURE_M12N10_REC,
2614     codebase => FEATURE_M12N10_REC,
2615     codetype => FEATURE_M12N10_REC,
2616 wakaba 1.50 data => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2617 wakaba 1.49 datafld => FEATURE_HTML4_REC_RESERVED,
2618     dataformatas => FEATURE_HTML4_REC_RESERVED,
2619     datasrc => FEATURE_HTML4_REC_RESERVED,
2620     declare => FEATURE_M12N10_REC,
2621 wakaba 1.50 height => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2622 wakaba 1.49 hspace => FEATURE_XHTML10_REC,
2623 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2624 wakaba 1.49 name => FEATURE_M12N10_REC,
2625     standby => FEATURE_M12N10_REC,
2626 wakaba 1.50 tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2627     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2628     usemap => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2629 wakaba 1.49 vspace => FEATURE_XHTML10_REC,
2630 wakaba 1.50 width => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2631 wakaba 1.40 })->($self, $item);
2632     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2633     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2634     $self->{onerror}->(node => $item->{node},
2635 wakaba 1.1 type => 'attribute missing:data|type');
2636     }
2637     }
2638     },
2639 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2640     check_child_element => sub {
2641     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2642     $child_is_transparent, $element_state) = @_;
2643     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2644     $self->{onerror}->(node => $child_el,
2645     type => 'element not allowed:minus',
2646     level => $self->{must_level});
2647     $element_state->{has_non_legend} = 1;
2648     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2649     #
2650     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2651     if ($element_state->{has_non_param}) {
2652     $self->{onerror}->(node => $child_el,
2653     type => 'element not allowed:prose',
2654     level => $self->{must_level});
2655 wakaba 1.39 }
2656 wakaba 1.41 } else {
2657     $HTMLProseContentChecker{check_child_element}->(@_);
2658     $element_state->{has_non_param} = 1;
2659 wakaba 1.39 }
2660 wakaba 1.25 },
2661 wakaba 1.41 check_child_text => sub {
2662     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2663     if ($has_significant) {
2664     $element_state->{has_non_param} = 1;
2665     }
2666 wakaba 1.42 },
2667     check_end => sub {
2668     my ($self, $item, $element_state) = @_;
2669     if ($element_state->{has_significant}) {
2670 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2671 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2672     ## NOTE: Transparent.
2673     } else {
2674     $self->{onerror}->(node => $item->{node},
2675     level => $self->{should_level},
2676     type => 'no significant content');
2677     }
2678     },
2679 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2680 wakaba 1.1 };
2681 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2682     ## What about |<section><object data><style scoped></style>x</object></section>|?
2683     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2684 wakaba 1.1
2685     $Element->{$HTML_NS}->{param} = {
2686 wakaba 1.40 %HTMLEmptyChecker,
2687 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2688 wakaba 1.40 check_attrs => sub {
2689     my ($self, $item, $element_state) = @_;
2690 wakaba 1.1 $GetHTMLAttrsChecker->({
2691     name => sub { },
2692     value => sub { },
2693 wakaba 1.49 }, {
2694     %HTMLAttrStatus,
2695 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2696     name => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2697 wakaba 1.49 type => FEATURE_M12N10_REC,
2698 wakaba 1.50 value => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2699 wakaba 1.49 valuetype => FEATURE_M12N10_REC,
2700 wakaba 1.40 })->($self, $item);
2701     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2702     $self->{onerror}->(node => $item->{node},
2703 wakaba 1.1 type => 'attribute missing:name');
2704     }
2705 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2706     $self->{onerror}->(node => $item->{node},
2707 wakaba 1.1 type => 'attribute missing:value');
2708     }
2709     },
2710     };
2711    
2712     $Element->{$HTML_NS}->{video} = {
2713 wakaba 1.40 %HTMLTransparentChecker,
2714 wakaba 1.48 status => FEATURE_HTML5_LC,
2715 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2716 wakaba 1.1 src => $HTMLURIAttrChecker,
2717     ## TODO: start, loopstart, loopend, end
2718     ## ISSUE: they MUST be "value time offset"s. Value?
2719 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2720 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2721     controls => $GetHTMLBooleanAttrChecker->('controls'),
2722 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2723 wakaba 1.42 ## TODO: width, height
2724 wakaba 1.50 }, {
2725     %HTMLAttrStatus,
2726     autoplay => FEATURE_HTML5_LC,
2727     controls => FEATURE_HTML5_LC,
2728     end => FEATURE_HTML5_LC,
2729     height => FEATURE_HTML5_LC,
2730     loopend => FEATURE_HTML5_LC,
2731     loopstart => FEATURE_HTML5_LC,
2732     playcount => FEATURE_HTML5_LC,
2733     poster => FEATURE_HTML5_LC,
2734     src => FEATURE_HTML5_LC,
2735     start => FEATURE_HTML5_LC,
2736     width => FEATURE_HTML5_LC,
2737 wakaba 1.1 }),
2738 wakaba 1.42 check_start => sub {
2739     my ($self, $item, $element_state) = @_;
2740     $element_state->{allow_source}
2741     = not $item->{node}->has_attribute_ns (undef, 'src');
2742     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2743     ## NOTE: It might be set true by |check_element|.
2744     },
2745     check_child_element => sub {
2746     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2747     $child_is_transparent, $element_state) = @_;
2748     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2749     $self->{onerror}->(node => $child_el,
2750     type => 'element not allowed:minus',
2751     level => $self->{must_level});
2752     delete $element_state->{allow_source};
2753     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2754     #
2755     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2756 wakaba 1.45 unless ($element_state->{allow_source}) {
2757 wakaba 1.42 $self->{onerror}->(node => $child_el,
2758     type => 'element not allowed:prose',
2759     level => $self->{must_level});
2760     }
2761 wakaba 1.45 $element_state->{has_source} = 1;
2762 wakaba 1.1 } else {
2763 wakaba 1.42 delete $element_state->{allow_source};
2764     $HTMLProseContentChecker{check_child_element}->(@_);
2765     }
2766     },
2767     check_child_text => sub {
2768     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2769     if ($has_significant) {
2770     delete $element_state->{allow_source};
2771     }
2772     $HTMLProseContentChecker{check_child_text}->(@_);
2773     },
2774     check_end => sub {
2775     my ($self, $item, $element_state) = @_;
2776     if ($element_state->{has_source} == -1) {
2777     $self->{onerror}->(node => $item->{node},
2778     type => 'element missing:source',
2779     level => $self->{must_level});
2780 wakaba 1.1 }
2781 wakaba 1.42
2782     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2783 wakaba 1.1 },
2784     };
2785    
2786     $Element->{$HTML_NS}->{audio} = {
2787 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2788 wakaba 1.48 status => FEATURE_HTML5_LC,
2789 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2790     src => $HTMLURIAttrChecker,
2791     ## TODO: start, loopstart, loopend, end
2792     ## ISSUE: they MUST be "value time offset"s. Value?
2793     ## ISSUE: playcount has no conformance creteria
2794     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2795     controls => $GetHTMLBooleanAttrChecker->('controls'),
2796 wakaba 1.50 }, {
2797     %HTMLAttrStatus,
2798     autoplay => FEATURE_HTML5_LC,
2799     controls => FEATURE_HTML5_LC,
2800     end => FEATURE_HTML5_LC,
2801     loopend => FEATURE_HTML5_LC,
2802     loopstart => FEATURE_HTML5_LC,
2803     playcount => FEATURE_HTML5_LC,
2804     src => FEATURE_HTML5_LC,
2805     start => FEATURE_HTML5_LC,
2806 wakaba 1.42 }),
2807 wakaba 1.1 };
2808    
2809     $Element->{$HTML_NS}->{source} = {
2810 wakaba 1.40 %HTMLEmptyChecker,
2811 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2812 wakaba 1.40 check_attrs => sub {
2813     my ($self, $item, $element_state) = @_;
2814 wakaba 1.1 $GetHTMLAttrsChecker->({
2815     src => $HTMLURIAttrChecker,
2816     type => $HTMLIMTAttrChecker,
2817     media => $HTMLMQAttrChecker,
2818 wakaba 1.50 }, {
2819     %HTMLAttrStatus,
2820     media => FEATURE_HTML5_DEFAULT,
2821     src => FEATURE_HTML5_DEFAULT,
2822     type => FEATURE_HTML5_DEFAULT,
2823 wakaba 1.40 })->($self, $item, $element_state);
2824     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2825     $self->{onerror}->(node => $item->{node},
2826 wakaba 1.1 type => 'attribute missing:src');
2827     }
2828     },
2829     };
2830    
2831     $Element->{$HTML_NS}->{canvas} = {
2832 wakaba 1.40 %HTMLTransparentChecker,
2833 wakaba 1.48 status => FEATURE_HTML5_LC,
2834 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2835 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2836     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2837 wakaba 1.50 }, {
2838     %HTMLAttrStatus,
2839     height => FEATURE_HTML5_LC,
2840     width => FEATURE_HTML5_LC,
2841 wakaba 1.1 }),
2842     };
2843    
2844     $Element->{$HTML_NS}->{map} = {
2845 wakaba 1.40 %HTMLProseContentChecker,
2846 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2847 wakaba 1.40 check_attrs => sub {
2848     my ($self, $item, $element_state) = @_;
2849 wakaba 1.4 my $has_id;
2850     $GetHTMLAttrsChecker->({
2851     id => sub {
2852     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2853     my ($self, $attr) = @_;
2854     my $value = $attr->value;
2855     if (length $value > 0) {
2856     if ($self->{id}->{$value}) {
2857     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2858     push @{$self->{id}->{$value}}, $attr;
2859     } else {
2860     $self->{id}->{$value} = [$attr];
2861     }
2862 wakaba 1.1 } else {
2863 wakaba 1.4 ## NOTE: MUST contain at least one character
2864     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2865 wakaba 1.1 }
2866 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2867     $self->{onerror}->(node => $attr, type => 'space in ID');
2868     }
2869     $self->{map}->{$value} ||= $attr;
2870     $has_id = 1;
2871     },
2872 wakaba 1.49 }, {
2873     %HTMLAttrStatus,
2874 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2875     dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2876     id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2877     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2878 wakaba 1.49 name => FEATURE_M12N10_REC_DEPRECATED,
2879 wakaba 1.50 onclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2880     ondblclick => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2881     onmousedown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2882     onmouseup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2883     onmouseover => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2884     onmousemove => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2885     onmouseout => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2886     onkeypress => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2887     onkeydown => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2888     onkeyup => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2889     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2890 wakaba 1.40 })->($self, $item, $element_state);
2891     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2892 wakaba 1.4 unless $has_id;
2893     },
2894 wakaba 1.1 };
2895    
2896     $Element->{$HTML_NS}->{area} = {
2897 wakaba 1.40 %HTMLEmptyChecker,
2898 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2899 wakaba 1.40 check_attrs => sub {
2900     my ($self, $item, $element_state) = @_;
2901 wakaba 1.1 my %attr;
2902     my $coords;
2903 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2904 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2905     $attr_ns = '' unless defined $attr_ns;
2906     my $attr_ln = $attr->manakai_local_name;
2907     my $checker;
2908     if ($attr_ns eq '') {
2909     $checker = {
2910     alt => sub { },
2911     ## NOTE: |alt| value has no conformance creteria.
2912     shape => $GetHTMLEnumeratedAttrChecker->({
2913     circ => -1, circle => 1,
2914     default => 1,
2915     poly => 1, polygon => -1,
2916     rect => 1, rectangle => -1,
2917     }),
2918     coords => sub {
2919     my ($self, $attr) = @_;
2920     my $value = $attr->value;
2921     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2922     $coords = [split /,/, $value];
2923     } else {
2924     $self->{onerror}->(node => $attr,
2925     type => 'coords:syntax error');
2926     }
2927     },
2928     target => $HTMLTargetAttrChecker,
2929     href => $HTMLURIAttrChecker,
2930     ping => $HTMLSpaceURIsAttrChecker,
2931 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2932 wakaba 1.1 media => $HTMLMQAttrChecker,
2933     hreflang => $HTMLLanguageTagAttrChecker,
2934     type => $HTMLIMTAttrChecker,
2935     }->{$attr_ln};
2936     if ($checker) {
2937     $attr{$attr_ln} = $attr;
2938     } else {
2939     $checker = $HTMLAttrChecker->{$attr_ln};
2940     }
2941     }
2942     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2943     || $AttrChecker->{$attr_ns}->{''};
2944     if ($checker) {
2945     $checker->($self, $attr) if ref $checker;
2946 wakaba 1.49 } elsif ($attr_ns eq '') {
2947     $self->{onerror}->(node => $attr, level => $self->{must_level},
2948     type => 'attribute not defined');
2949 wakaba 1.1 } else {
2950     $self->{onerror}->(node => $attr, level => 'unsupported',
2951     type => 'attribute');
2952     ## ISSUE: No comformance createria for unknown attributes in the spec
2953     }
2954 wakaba 1.49
2955     if ($attr_ns eq '') {
2956     $self->_attr_status_info ($attr, {
2957     %HTMLAttrStatus,
2958     %HTMLM12NCommonAttrStatus,
2959     accesskey => FEATURE_M12N10_REC,
2960 wakaba 1.50 alt => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2961     coords => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2962     href => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2963     hreftype => FEATURE_HTML5_DEFAULT,
2964     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
2965     media => FEATURE_HTML5_DEFAULT,
2966 wakaba 1.49 nohref => FEATURE_M12N10_REC,
2967 wakaba 1.50 onblur => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2968     onfocus => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2969     ping => FEATURE_HTML5_DEFAULT,
2970     rel => FEATURE_HTML5_DEFAULT,
2971     shape => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2972     tabindex => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2973     target => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2974     type => FEATURE_HTML5_DEFAULT,
2975 wakaba 1.49 }->{$attr_ln});
2976     }
2977 wakaba 1.1 }
2978    
2979     if (defined $attr{href}) {
2980 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2981 wakaba 1.1 unless (defined $attr{alt}) {
2982 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2983 wakaba 1.1 type => 'attribute missing:alt');
2984     }
2985     } else {
2986     for (qw/target ping rel media hreflang type alt/) {
2987     if (defined $attr{$_}) {
2988     $self->{onerror}->(node => $attr{$_},
2989     type => 'attribute not allowed');
2990     }
2991     }
2992     }
2993    
2994     my $shape = 'rectangle';
2995     if (defined $attr{shape}) {
2996     $shape = {
2997     circ => 'circle', circle => 'circle',
2998     default => 'default',
2999     poly => 'polygon', polygon => 'polygon',
3000     rect => 'rectangle', rectangle => 'rectangle',
3001     }->{lc $attr{shape}->value} || 'rectangle';
3002     ## TODO: ASCII lowercase?
3003     }
3004    
3005     if ($shape eq 'circle') {
3006     if (defined $attr{coords}) {
3007     if (defined $coords) {
3008     if (@$coords == 3) {
3009     if ($coords->[2] < 0) {
3010     $self->{onerror}->(node => $attr{coords},
3011     type => 'coords:out of range:2');
3012     }
3013     } else {
3014     $self->{onerror}->(node => $attr{coords},
3015     type => 'coords:number:3:'.@$coords);
3016     }
3017     } else {
3018     ## NOTE: A syntax error has been reported.
3019     }
3020     } else {
3021 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3022 wakaba 1.1 type => 'attribute missing:coords');
3023     }
3024     } elsif ($shape eq 'default') {
3025     if (defined $attr{coords}) {
3026     $self->{onerror}->(node => $attr{coords},
3027     type => 'attribute not allowed');
3028     }
3029     } elsif ($shape eq 'polygon') {
3030     if (defined $attr{coords}) {
3031     if (defined $coords) {
3032     if (@$coords >= 6) {
3033     unless (@$coords % 2 == 0) {
3034     $self->{onerror}->(node => $attr{coords},
3035     type => 'coords:number:even:'.@$coords);
3036     }
3037     } else {
3038     $self->{onerror}->(node => $attr{coords},
3039     type => 'coords:number:>=6:'.@$coords);
3040     }
3041     } else {
3042     ## NOTE: A syntax error has been reported.
3043     }
3044     } else {
3045 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3046 wakaba 1.1 type => 'attribute missing:coords');
3047     }
3048     } elsif ($shape eq 'rectangle') {
3049     if (defined $attr{coords}) {
3050     if (defined $coords) {
3051     if (@$coords == 4) {
3052     unless ($coords->[0] < $coords->[2]) {
3053     $self->{onerror}->(node => $attr{coords},
3054     type => 'coords:out of range:0');
3055     }
3056     unless ($coords->[1] < $coords->[3]) {
3057     $self->{onerror}->(node => $attr{coords},
3058     type => 'coords:out of range:1');
3059     }
3060     } else {
3061     $self->{onerror}->(node => $attr{coords},
3062     type => 'coords:number:4:'.@$coords);
3063     }
3064     } else {
3065     ## NOTE: A syntax error has been reported.
3066     }
3067     } else {
3068 wakaba 1.40 $self->{onerror}->(node => $item->{node},
3069 wakaba 1.1 type => 'attribute missing:coords');
3070     }
3071     }
3072     },
3073     };
3074     ## TODO: only in map
3075    
3076     $Element->{$HTML_NS}->{table} = {
3077 wakaba 1.40 %HTMLChecker,
3078 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3079     check_attrs => $GetHTMLAttrsChecker->({}, {
3080     %HTMLAttrStatus,
3081     %HTMLM12NCommonAttrStatus,
3082     align => FEATURE_M12N10_REC_DEPRECATED,
3083     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3084     border => FEATURE_M12N10_REC,
3085     cellpadding => FEATURE_M12N10_REC,
3086     cellspacing => FEATURE_M12N10_REC,
3087     datafld => FEATURE_HTML4_REC_RESERVED,
3088     dataformatas => FEATURE_HTML4_REC_RESERVED,
3089     datapagesize => FEATURE_M12N10_REC,
3090     datasrc => FEATURE_HTML4_REC_RESERVED,
3091     frame => FEATURE_M12N10_REC,
3092 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3093 wakaba 1.49 rules => FEATURE_M12N10_REC,
3094     summary => FEATURE_M12N10_REC,
3095     width => FEATURE_M12N10_REC,
3096     }),
3097 wakaba 1.40 check_start => sub {
3098     my ($self, $item, $element_state) = @_;
3099     $element_state->{phase} = 'before caption';
3100     },
3101     check_child_element => sub {
3102     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3103     $child_is_transparent, $element_state) = @_;
3104     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3105     $self->{onerror}->(node => $child_el,
3106     type => 'element not allowed:minus',
3107     level => $self->{must_level});
3108     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3109     #
3110     } elsif ($element_state->{phase} eq 'in tbodys') {
3111     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3112     #$element_state->{phase} = 'in tbodys';
3113     } elsif (not $element_state->{has_tfoot} and
3114     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3115     $element_state->{phase} = 'after tfoot';
3116     $element_state->{has_tfoot} = 1;
3117     } else {
3118     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3119     }
3120     } elsif ($element_state->{phase} eq 'in trs') {
3121     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3122     #$element_state->{phase} = 'in trs';
3123     } elsif (not $element_state->{has_tfoot} and
3124     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3125     $element_state->{phase} = 'after tfoot';
3126     $element_state->{has_tfoot} = 1;
3127     } else {
3128     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3129     }
3130     } elsif ($element_state->{phase} eq 'after thead') {
3131     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3132     $element_state->{phase} = 'in tbodys';
3133     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3134     $element_state->{phase} = 'in trs';
3135     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3136     $element_state->{phase} = 'in tbodys';
3137     $element_state->{has_tfoot} = 1;
3138     } else {
3139     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3140     }
3141     } elsif ($element_state->{phase} eq 'in colgroup') {
3142     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3143     $element_state->{phase} = 'in colgroup';
3144     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3145     $element_state->{phase} = 'after thead';
3146     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3147     $element_state->{phase} = 'in tbodys';
3148     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3149     $element_state->{phase} = 'in trs';
3150     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3151     $element_state->{phase} = 'in tbodys';
3152     $element_state->{has_tfoot} = 1;
3153     } else {
3154     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3155     }
3156     } elsif ($element_state->{phase} eq 'before caption') {
3157     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3158     $element_state->{phase} = 'in colgroup';
3159     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3160     $element_state->{phase} = 'in colgroup';
3161     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3162     $element_state->{phase} = 'after thead';
3163     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3164     $element_state->{phase} = 'in tbodys';
3165     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3166     $element_state->{phase} = 'in trs';
3167     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3168     $element_state->{phase} = 'in tbodys';
3169     $element_state->{has_tfoot} = 1;
3170     } else {
3171     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3172     }
3173     } elsif ($element_state->{phase} eq 'after tfoot') {
3174     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3175     } else {
3176     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3177     }
3178     },
3179     check_child_text => sub {
3180     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3181     if ($has_significant) {
3182     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3183 wakaba 1.1 }
3184 wakaba 1.40 },
3185     check_end => sub {
3186     my ($self, $item, $element_state) = @_;
3187 wakaba 1.1
3188     ## Table model errors
3189     require Whatpm::HTMLTable;
3190 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3191 wakaba 1.1 my %opt = @_;
3192     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3193     });
3194 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3195 wakaba 1.1
3196 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3197 wakaba 1.1 },
3198     };
3199    
3200     $Element->{$HTML_NS}->{caption} = {
3201 wakaba 1.40 %HTMLPhrasingContentChecker,
3202 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3203     check_attrs => $GetHTMLAttrsChecker->({}, {
3204     %HTMLAttrStatus,
3205     %HTMLM12NCommonAttrStatus,
3206     align => FEATURE_M12N10_REC_DEPRECATED,
3207 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3208 wakaba 1.49 }),
3209 wakaba 1.1 };
3210    
3211     $Element->{$HTML_NS}->{colgroup} = {
3212 wakaba 1.40 %HTMLEmptyChecker,
3213 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3214 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3215 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3216     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3217     ## TODO: "attribute not supported" if |col|.
3218     ## ISSUE: MUST NOT if any |col|?
3219     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3220 wakaba 1.49 }, {
3221     %HTMLAttrStatus,
3222     %HTMLM12NCommonAttrStatus,
3223     align => FEATURE_M12N10_REC,
3224     char => FEATURE_M12N10_REC,
3225     charoff => FEATURE_M12N10_REC,
3226 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3227     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3228 wakaba 1.49 valign => FEATURE_M12N10_REC,
3229     width => FEATURE_M12N10_REC,
3230 wakaba 1.1 }),
3231 wakaba 1.40 check_child_element => sub {
3232     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3233     $child_is_transparent, $element_state) = @_;
3234     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3235     $self->{onerror}->(node => $child_el,
3236     type => 'element not allowed:minus',
3237     level => $self->{must_level});
3238     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3239     #
3240     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3241     #
3242     } else {
3243     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3244     }
3245     },
3246     check_child_text => sub {
3247     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3248     if ($has_significant) {
3249     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3250 wakaba 1.1 }
3251     },
3252     };
3253    
3254     $Element->{$HTML_NS}->{col} = {
3255 wakaba 1.40 %HTMLEmptyChecker,
3256 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3257 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3258 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3259 wakaba 1.49 }, {
3260     %HTMLAttrStatus,
3261     %HTMLM12NCommonAttrStatus,
3262     align => FEATURE_M12N10_REC,
3263     char => FEATURE_M12N10_REC,
3264     charoff => FEATURE_M12N10_REC,
3265 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3266     span => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3267 wakaba 1.49 valign => FEATURE_M12N10_REC,
3268     width => FEATURE_M12N10_REC,
3269 wakaba 1.1 }),
3270     };
3271    
3272     $Element->{$HTML_NS}->{tbody} = {
3273 wakaba 1.40 %HTMLChecker,
3274 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3275     check_attrs => $GetHTMLAttrsChecker->({}, {
3276     %HTMLAttrStatus,
3277     %HTMLM12NCommonAttrStatus,
3278     align => FEATURE_M12N10_REC,
3279     char => FEATURE_M12N10_REC,
3280     charoff => FEATURE_M12N10_REC,
3281 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3282 wakaba 1.49 valign => FEATURE_M12N10_REC,
3283     }),
3284 wakaba 1.40 check_child_element => sub {
3285     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3286     $child_is_transparent, $element_state) = @_;
3287     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3288     $self->{onerror}->(node => $child_el,
3289     type => 'element not allowed:minus',
3290     level => $self->{must_level});
3291     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3292     #
3293     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3294     $element_state->{has_tr} = 1;
3295     } else {
3296     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3297     }
3298     },
3299     check_child_text => sub {
3300     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3301     if ($has_significant) {
3302     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3303 wakaba 1.1 }
3304 wakaba 1.40 },
3305     check_end => sub {
3306     my ($self, $item, $element_state) = @_;
3307     unless ($element_state->{has_tr}) {
3308     $self->{onerror}->(node => $item->{node},
3309     type => 'child element missing:tr');
3310 wakaba 1.1 }
3311 wakaba 1.40
3312     $HTMLChecker{check_end}->(@_);
3313 wakaba 1.1 },
3314     };
3315    
3316     $Element->{$HTML_NS}->{thead} = {
3317 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3318 wakaba 1.1 };
3319    
3320     $Element->{$HTML_NS}->{tfoot} = {
3321 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3322 wakaba 1.1 };
3323    
3324     $Element->{$HTML_NS}->{tr} = {
3325 wakaba 1.40 %HTMLChecker,
3326 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3327     check_attrs => $GetHTMLAttrsChecker->({}, {
3328     %HTMLAttrStatus,
3329     %HTMLM12NCommonAttrStatus,
3330     align => FEATURE_M12N10_REC,
3331     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3332     char => FEATURE_M12N10_REC,
3333     charoff => FEATURE_M12N10_REC,
3334 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3335 wakaba 1.49 valign => FEATURE_M12N10_REC,
3336     }),
3337 wakaba 1.40 check_child_element => sub {
3338     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3339     $child_is_transparent, $element_state) = @_;
3340     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3341     $self->{onerror}->(node => $child_el,
3342     type => 'element not allowed:minus',
3343     level => $self->{must_level});
3344     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3345     #
3346     } elsif ($child_nsuri eq $HTML_NS and
3347     ($child_ln eq 'td' or $child_ln eq 'th')) {
3348     $element_state->{has_cell} = 1;
3349     } else {
3350     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3351     }
3352     },
3353     check_child_text => sub {
3354     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3355     if ($has_significant) {
3356     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3357 wakaba 1.1 }
3358 wakaba 1.40 },
3359     check_end => sub {
3360     my ($self, $item, $element_state) = @_;
3361     unless ($element_state->{has_cell}) {
3362     $self->{onerror}->(node => $item->{node},
3363     type => 'child element missing:td|th');
3364 wakaba 1.1 }
3365 wakaba 1.40
3366     $HTMLChecker{check_end}->(@_);
3367 wakaba 1.1 },
3368     };
3369    
3370     $Element->{$HTML_NS}->{td} = {
3371 wakaba 1.40 %HTMLProseContentChecker,
3372 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3373 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3374 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3375     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3376 wakaba 1.49 }, {
3377     %HTMLAttrStatus,
3378     %HTMLM12NCommonAttrStatus,
3379     abbr => FEATURE_M12N10_REC,
3380     align => FEATURE_M12N10_REC,
3381     axis => FEATURE_M12N10_REC,
3382     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3383     char => FEATURE_M12N10_REC,
3384     charoff => FEATURE_M12N10_REC,
3385 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3386 wakaba 1.49 headers => FEATURE_M12N10_REC,
3387     height => FEATURE_M12N10_REC_DEPRECATED,
3388 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3389 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3390 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3391 wakaba 1.49 scope => FEATURE_M12N10_REC,
3392     valign => FEATURE_M12N10_REC,
3393     width => FEATURE_M12N10_REC_DEPRECATED,
3394 wakaba 1.1 }),
3395     };
3396    
3397     $Element->{$HTML_NS}->{th} = {
3398 wakaba 1.40 %HTMLPhrasingContentChecker,
3399 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3400 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3401 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3402     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3403     scope => $GetHTMLEnumeratedAttrChecker
3404     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3405 wakaba 1.49 }, {
3406     %HTMLAttrStatus,
3407     %HTMLM12NCommonAttrStatus,
3408     abbr => FEATURE_M12N10_REC,
3409     align => FEATURE_M12N10_REC,
3410     axis => FEATURE_M12N10_REC,
3411     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3412     char => FEATURE_M12N10_REC,
3413     charoff => FEATURE_M12N10_REC,
3414 wakaba 1.50 colspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3415 wakaba 1.49 headers => FEATURE_M12N10_REC,
3416     height => FEATURE_M12N10_REC_DEPRECATED,
3417 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3418 wakaba 1.49 nowrap => FEATURE_M12N10_REC_DEPRECATED,
3419 wakaba 1.50 rowspan => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3420     scope => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3421 wakaba 1.49 valign => FEATURE_M12N10_REC,
3422     width => FEATURE_M12N10_REC_DEPRECATED,
3423 wakaba 1.1 }),
3424     };
3425    
3426     ## TODO: forms
3427 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3428 wakaba 1.1
3429 wakaba 1.49 =pod
3430    
3431     form Common, accept, accept-charset action method enctype target onreset onsubmit name(depreacte) xhtml10.lang
3432     input Common accept accesskey alt checked disabled maxlength name readonly size src tabindex type value usemap ismap onblur onchange onfocus onselect align(deprecated) lang(xhtml10) %reserved
3433     select Common disabled multiple name size tabindex onblur onchange onfocus lang(xhtml10) %reserved
3434     option Common disabled label selected value lang(x10)
3435     textarea Common accesskey cols disabled name readonly rows tabindex onblur onchange onfocus onselect lang(x10) $resercvd
3436     button Common accesskey disabled name tabindex type value onblur onfocus lang(x10) %reserved
3437     fieldset Common lang(x10)
3438     label Common accesskey for onblur onfocus lang(xhtml10)
3439     optgroup Common disabled label lang(x10)
3440    
3441     %reserved (html4)
3442     datafld => FEATURE_HTML4_REC_RESERVED,
3443     dataformatas => FEATURE_HTML4_REC_RESERVED,
3444     datasrc => FEATURE_HTML4_REC_RESERVED,
3445    
3446     =cut
3447    
3448 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
3449 wakaba 1.40 %HTMLChecker,
3450 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3451 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3452 wakaba 1.1 src => $HTMLURIAttrChecker,
3453     defer => $GetHTMLBooleanAttrChecker->('defer'),
3454     async => $GetHTMLBooleanAttrChecker->('async'),
3455     type => $HTMLIMTAttrChecker,
3456 wakaba 1.49 }, {
3457     %HTMLAttrStatus,
3458     %HTMLM12NCommonAttrStatus,
3459 wakaba 1.50 async => FEATURE_HTML5_DEFAULT,
3460 wakaba 1.49 charset => FEATURE_M12N10_REC,
3461 wakaba 1.50 defer => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3462 wakaba 1.49 event => FEATURE_HTML4_REC_RESERVED,
3463     for => FEATURE_HTML4_REC_RESERVED,
3464 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3465 wakaba 1.49 language => FEATURE_M12N10_REC_DEPRECATED,
3466 wakaba 1.50 src => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3467     type => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3468 wakaba 1.9 }),
3469 wakaba 1.40 check_start => sub {
3470     my ($self, $item, $element_state) = @_;
3471 wakaba 1.1
3472 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
3473     $element_state->{must_be_empty} = 1;
3474 wakaba 1.1 } else {
3475     ## NOTE: No content model conformance in HTML5 spec.
3476 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
3477     my $language = $item->{node}->get_attribute_ns (undef, 'language');
3478 wakaba 1.1 if ((defined $type and $type eq '') or
3479     (defined $language and $language eq '')) {
3480     $type = 'text/javascript';
3481     } elsif (defined $type) {
3482     #
3483     } elsif (defined $language) {
3484     $type = 'text/' . $language;
3485     } else {
3486     $type = 'text/javascript';
3487     }
3488 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
3489     }
3490     },
3491     check_child_element => sub {
3492     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3493     $child_is_transparent, $element_state) = @_;
3494     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3495     $self->{onerror}->(node => $child_el,
3496     type => 'element not allowed:minus',
3497     level => $self->{must_level});
3498     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3499     #
3500     } else {
3501     if ($element_state->{must_be_empty}) {
3502     $self->{onerror}->(node => $child_el,
3503     type => 'element not allowed');
3504     }
3505     }
3506     },
3507     check_child_text => sub {
3508     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3509     if ($has_significant and
3510     $element_state->{must_be_empty}) {
3511     $self->{onerror}->(node => $child_node,
3512     type => 'character not allowed');
3513     }
3514     },
3515     check_end => sub {
3516     my ($self, $item, $element_state) = @_;
3517     unless ($element_state->{must_be_empty}) {
3518     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
3519     type => 'script:'.$element_state->{script_type});
3520     ## TODO: text/javascript support
3521    
3522     $HTMLChecker{check_end}->(@_);
3523 wakaba 1.1 }
3524     },
3525     };
3526 wakaba 1.25 ## ISSUE: Significant check and text child node
3527 wakaba 1.1
3528     ## NOTE: When script is disabled.
3529     $Element->{$HTML_NS}->{noscript} = {
3530 wakaba 1.40 %HTMLTransparentChecker,
3531 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3532     check_attrs => $GetHTMLAttrsChecker->({}, {
3533     %HTMLAttrStatus,
3534     %HTMLM12NCommonAttrStatus,
3535 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3536 wakaba 1.49 }),
3537 wakaba 1.40 check_start => sub {
3538     my ($self, $item, $element_state) = @_;
3539 wakaba 1.3
3540 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
3541     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
3542 wakaba 1.3 }
3543    
3544 wakaba 1.40 unless ($self->{flag}->{in_head}) {
3545     $self->_add_minus_elements ($element_state,
3546     {$HTML_NS => {noscript => 1}});
3547     }
3548 wakaba 1.3 },
3549 wakaba 1.40 check_child_element => sub {
3550     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3551     $child_is_transparent, $element_state) = @_;
3552     if ($self->{flag}->{in_head}) {
3553     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3554     $self->{onerror}->(node => $child_el,
3555     type => 'element not allowed:minus',
3556     level => $self->{must_level});
3557     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3558     #
3559     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
3560     #
3561     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
3562     if ($child_el->has_attribute_ns (undef, 'scoped')) {
3563     $self->{onerror}->(node => $child_el,
3564     type => 'element not allowed:head noscript',
3565     level => $self->{must_level});
3566     }
3567     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
3568 wakaba 1.47 my $http_equiv_attr
3569     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
3570     if ($http_equiv_attr) {
3571     ## TODO: case
3572     if (lc $http_equiv_attr->value eq 'content-type') {
3573 wakaba 1.40 $self->{onerror}->(node => $child_el,
3574 wakaba 1.34 type => 'element not allowed:head noscript',
3575     level => $self->{must_level});
3576 wakaba 1.47 } else {
3577     #
3578 wakaba 1.3 }
3579 wakaba 1.47 } else {
3580     $self->{onerror}->(node => $child_el,
3581     type => 'element not allowed:head noscript',
3582     level => $self->{must_level});
3583 wakaba 1.3 }
3584 wakaba 1.40 } else {
3585     $self->{onerror}->(node => $child_el,
3586     type => 'element not allowed:head noscript',
3587     level => $self->{must_level});
3588     }
3589     } else {
3590     $HTMLTransparentChecker{check_child_element}->(@_);
3591     }
3592     },
3593     check_child_text => sub {
3594     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3595     if ($self->{flag}->{in_head}) {
3596     if ($has_significant) {
3597     $self->{onerror}->(node => $child_node,
3598     type => 'character not allowed');
3599 wakaba 1.3 }
3600     } else {
3601 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
3602     }
3603     },
3604     check_end => sub {
3605     my ($self, $item, $element_state) = @_;
3606     $self->_remove_minus_elements ($element_state);
3607     if ($self->{flag}->{in_head}) {
3608     $HTMLChecker{check_end}->(@_);
3609     } else {
3610     $HTMLPhrasingContentChecker{check_end}->(@_);
3611 wakaba 1.3 }
3612 wakaba 1.1 },
3613     };
3614 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3615 wakaba 1.1
3616     $Element->{$HTML_NS}->{'event-source'} = {
3617 wakaba 1.40 %HTMLEmptyChecker,
3618 wakaba 1.48 status => FEATURE_HTML5_LC,
3619 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3620 wakaba 1.1 src => $HTMLURIAttrChecker,
3621 wakaba 1.50 }, {
3622     %HTMLAttrStatus,
3623     src => FEATURE_HTML5_LC,
3624 wakaba 1.1 }),
3625     };
3626    
3627     $Element->{$HTML_NS}->{details} = {
3628 wakaba 1.40 %HTMLProseContentChecker,
3629 wakaba 1.48 status => FEATURE_HTML5_WD,
3630 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3631 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
3632 wakaba 1.50 }, {
3633     %HTMLAttrStatus,
3634     open => FEATURE_HTML5_WD,
3635 wakaba 1.1 }),
3636 wakaba 1.43 ## NOTE: legend, Prose
3637     check_child_element => sub {
3638     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3639     $child_is_transparent, $element_state) = @_;
3640     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3641     $self->{onerror}->(node => $child_el,
3642     type => 'element not allowed:minus',
3643     level => $self->{must_level});
3644     $element_state->{has_non_legend} = 1;
3645     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3646     #
3647     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
3648     if ($element_state->{has_non_legend}) {
3649     $self->{onerror}->(node => $child_el,
3650     type => 'element not allowed:details legend',
3651     level => $self->{must_level});
3652     }
3653     $element_state->{has_legend} = 1;
3654     $element_state->{has_non_legend} = 1;
3655     } else {
3656     $HTMLProseContentChecker{check_child_element}->(@_);
3657     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
3658     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
3659     ## is conforming?
3660     }
3661     },
3662     check_child_text => sub {
3663     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3664     if ($has_significant) {
3665     $element_state->{has_non_legend} = 1;
3666     }
3667     },
3668     check_end => sub {
3669     my ($self, $item, $element_state) = @_;
3670 wakaba 1.1
3671 wakaba 1.43 unless ($element_state->{has_legend}) {
3672     $self->{onerror}->(node => $item->{node},
3673     type => 'element missing:legend',
3674     level => $self->{must_level});
3675     }
3676    
3677     $HTMLProseContentChecker{check_end}->(@_);
3678     ## ISSUE: |<details><legend>aa</legend></details>| error?
3679 wakaba 1.1 },
3680     };
3681    
3682     $Element->{$HTML_NS}->{datagrid} = {
3683 wakaba 1.40 %HTMLProseContentChecker,
3684 wakaba 1.48 status => FEATURE_HTML5_WD,
3685 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3686 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3687     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3688 wakaba 1.50 }, {
3689     %HTMLAttrStatus,
3690     disabled => FEATURE_HTML5_WD,
3691     multiple => FEATURE_HTML5_WD,
3692 wakaba 1.1 }),
3693 wakaba 1.40 check_start => sub {
3694     my ($self, $item, $element_state) = @_;
3695 wakaba 1.1
3696 wakaba 1.40 $self->_add_minus_elements ($element_state,
3697     {$HTML_NS => {a => 1, datagrid => 1}});
3698     $element_state->{phase} = 'any';
3699     },
3700     ## Prose -(text* table Prose*) | table | select | datalist | Empty
3701     check_child_element => sub {
3702     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3703     $child_is_transparent, $element_state) = @_;
3704     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3705     $self->{onerror}->(node => $child_el,
3706     type => 'element not allowed:minus',
3707     level => $self->{must_level});
3708     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3709     #
3710     } elsif ($element_state->{phase} eq 'prose') {
3711     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
3712 wakaba 1.44 if (not $element_state->{has_element} and
3713 wakaba 1.40 $child_nsuri eq $HTML_NS and
3714     $child_ln eq 'table') {
3715     $self->{onerror}->(node => $child_el,
3716     type => 'element not allowed');
3717     } else {
3718 wakaba 1.8 #
3719 wakaba 1.1 }
3720 wakaba 1.40 } else {
3721     $self->{onerror}->(node => $child_el,
3722     type => 'element not allowed');
3723     }
3724 wakaba 1.43 $element_state->{has_element} = 1;
3725 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
3726     if ($child_nsuri eq $HTML_NS and
3727     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
3728     $element_state->{phase} = 'none';
3729     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
3730     $element_state->{has_element} = 1;
3731     $element_state->{phase} = 'prose';
3732 wakaba 1.43 ## TODO: transparent?
3733 wakaba 1.40 } else {
3734     $self->{onerror}->(node => $child_el,
3735     type => 'element not allowed');
3736     }
3737     } elsif ($element_state->{phase} eq 'none') {
3738     $self->{onerror}->(node => $child_el,
3739     type => 'element not allowed');
3740     } else {
3741     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
3742     }
3743     },
3744     check_child_text => sub {
3745     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3746     if ($has_significant) {
3747     if ($element_state->{phase} eq 'prose') {
3748     #
3749     } elsif ($element_state->{phase} eq 'any') {
3750     $element_state->{phase} = 'prose';
3751     } else {
3752     $self->{onerror}->(node => $child_node,
3753     type => 'character not allowed');
3754 wakaba 1.1 }
3755     }
3756 wakaba 1.40 },
3757     check_end => sub {
3758     my ($self, $item, $element_state) = @_;
3759     $self->_remove_minus_elements ($element_state);
3760 wakaba 1.1
3761 wakaba 1.40 if ($element_state->{phase} eq 'none') {
3762     $HTMLChecker{check_end}->(@_);
3763     } else {
3764     $HTMLPhrasingContentChecker{check_end}->(@_);
3765     }
3766     },
3767 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3768     ## are not disallowed (assuming that form control contents are also
3769     ## prose content).
3770 wakaba 1.1 };
3771    
3772     $Element->{$HTML_NS}->{command} = {
3773 wakaba 1.40 %HTMLEmptyChecker,
3774 wakaba 1.48 status => FEATURE_HTML5_WD,
3775 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3776 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
3777     default => $GetHTMLBooleanAttrChecker->('default'),
3778     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3779     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3780     icon => $HTMLURIAttrChecker,
3781     label => sub { }, ## NOTE: No conformance creteria
3782     radiogroup => sub { }, ## NOTE: No conformance creteria
3783     type => sub {
3784     my ($self, $attr) = @_;
3785     my $value = $attr->value;
3786     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3787     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3788     }
3789     },
3790 wakaba 1.50 }, {
3791     %HTMLAttrStatus,
3792     checked => FEATURE_HTML5_WD,
3793     default => FEATURE_HTML5_WD,
3794     disabled => FEATURE_HTML5_WD,
3795     hidden => FEATURE_HTML5_WD,
3796     icon => FEATURE_HTML5_WD,
3797     label => FEATURE_HTML5_WD,
3798     radiogroup => FEATURE_HTML5_WD,
3799     type => FEATURE_HTML5_WD,
3800 wakaba 1.1 }),
3801     };
3802    
3803     $Element->{$HTML_NS}->{menu} = {
3804 wakaba 1.40 %HTMLPhrasingContentChecker,
3805 wakaba 1.49 status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
3806 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3807 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3808     id => sub {
3809     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3810     my ($self, $attr) = @_;
3811     my $value = $attr->value;
3812     if (length $value > 0) {
3813     if ($self->{id}->{$value}) {
3814     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3815     push @{$self->{id}->{$value}}, $attr;
3816     } else {
3817     $self->{id}->{$value} = [$attr];
3818     }
3819     } else {
3820     ## NOTE: MUST contain at least one character
3821     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3822     }
3823     if ($value =~ /[\x09-\x0D\x20]/) {
3824     $self->{onerror}->(node => $attr, type => 'space in ID');
3825     }
3826     $self->{menu}->{$value} ||= $attr;
3827     ## ISSUE: <menu id=""><p contextmenu=""> match?
3828     },
3829     label => sub { }, ## NOTE: No conformance creteria
3830     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3831 wakaba 1.49 }, {
3832     %HTMLAttrStatus,
3833     %HTMLM12NCommonAttrStatus,
3834 wakaba 1.50 autosubmit => FEATURE_HTML5_WD,
3835 wakaba 1.49 compat => FEATURE_M12N10_REC_DEPRECATED,
3836 wakaba 1.50 label => FEATURE_HTML5_WD,
3837     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3838     type => FEATURE_HTML5_WD,
3839 wakaba 1.1 }),
3840 wakaba 1.40 check_start => sub {
3841     my ($self, $item, $element_state) = @_;
3842     $element_state->{phase} = 'li or phrasing';
3843     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
3844     $self->{flag}->{in_menu} = 1;
3845     },
3846     check_child_element => sub {
3847     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3848     $child_is_transparent, $element_state) = @_;
3849     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3850     $self->{onerror}->(node => $child_el,
3851     type => 'element not allowed:minus',
3852     level => $self->{must_level});
3853     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3854     #
3855     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3856     if ($element_state->{phase} eq 'li') {
3857     #
3858     } elsif ($element_state->{phase} eq 'li or phrasing') {
3859     $element_state->{phase} = 'li';
3860     } else {
3861     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3862     }
3863     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3864     if ($element_state->{phase} eq 'phrasing') {
3865     #
3866     } elsif ($element_state->{phase} eq 'li or phrasing') {
3867     $element_state->{phase} = 'phrasing';
3868     } else {
3869     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3870     }
3871     } else {
3872     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3873     }
3874     },
3875     check_child_text => sub {
3876     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3877     if ($has_significant) {
3878     if ($element_state->{phase} eq 'phrasing') {
3879     #
3880     } elsif ($element_state->{phase} eq 'li or phrasing') {
3881     $element_state->{phase} = 'phrasing';
3882     } else {
3883     $self->{onerror}->(node => $child_node,
3884     type => 'character not allowed');
3885 wakaba 1.1 }
3886     }
3887 wakaba 1.40 },
3888     check_end => sub {
3889     my ($self, $item, $element_state) = @_;
3890     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
3891    
3892     if ($element_state->{phase} eq 'li') {
3893     $HTMLChecker{check_end}->(@_);
3894     } else { # 'phrasing' or 'li or phrasing'
3895     $HTMLPhrasingContentChecker{check_end}->(@_);
3896 wakaba 1.1 }
3897     },
3898 wakaba 1.8 };
3899    
3900     $Element->{$HTML_NS}->{datatemplate} = {
3901 wakaba 1.40 %HTMLChecker,
3902 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3903 wakaba 1.40 check_child_element => sub {
3904     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3905     $child_is_transparent, $element_state) = @_;
3906     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3907     $self->{onerror}->(node => $child_el,
3908     type => 'element not allowed:minus',
3909     level => $self->{must_level});
3910     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3911     #
3912     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
3913     #
3914     } else {
3915     $self->{onerror}->(node => $child_el,
3916     type => 'element not allowed:datatemplate');
3917     }
3918     },
3919     check_child_text => sub {
3920     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3921     if ($has_significant) {
3922     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3923 wakaba 1.8 }
3924     },
3925     is_xml_root => 1,
3926     };
3927    
3928     $Element->{$HTML_NS}->{rule} = {
3929 wakaba 1.40 %HTMLChecker,
3930 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3931 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3932 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3933 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3934 wakaba 1.50 }, {
3935     %HTMLAttrStatus,
3936     condition => FEATURE_HTML5_AT_RISK,
3937     mode => FEATURE_HTML5_AT_RISK,
3938 wakaba 1.8 }),
3939 wakaba 1.40 check_start => sub {
3940     my ($self, $item, $element_state) = @_;
3941     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
3942     },
3943     check_child_element => sub { },
3944     check_child_text => sub { },
3945     check_end => sub {
3946     my ($self, $item, $element_state) = @_;
3947     $self->_remove_plus_elements ($element_state);
3948     $HTMLChecker{check_end}->(@_);
3949 wakaba 1.8 },
3950     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3951     ## is applied to some conforming data, results in a conforming DOM tree.":
3952     ## We don't check against this.
3953     };
3954    
3955     $Element->{$HTML_NS}->{nest} = {
3956 wakaba 1.40 %HTMLEmptyChecker,
3957 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3958 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3959 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3960     mode => sub {
3961     my ($self, $attr) = @_;
3962     my $value = $attr->value;
3963     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3964     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3965     }
3966     },
3967 wakaba 1.50 }, {
3968     %HTMLAttrStatus,
3969     filter => FEATURE_HTML5_AT_RISK,
3970     mode => FEATURE_HTML5_AT_RISK,
3971 wakaba 1.8 }),
3972 wakaba 1.1 };
3973    
3974     $Element->{$HTML_NS}->{legend} = {
3975 wakaba 1.40 %HTMLPhrasingContentChecker,
3976 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3977     check_attrs => $GetHTMLAttrsChecker->({}, {
3978     %HTMLAttrStatus,
3979     %HTMLM12NCommonAttrStatus,
3980     accesskey => FEATURE_M12N10_REC,
3981     align => FEATURE_M12N10_REC_DEPRECATED,
3982 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3983 wakaba 1.49 }),
3984 wakaba 1.1 };
3985    
3986     $Element->{$HTML_NS}->{div} = {
3987 wakaba 1.40 %HTMLProseContentChecker,
3988 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3989     check_attrs => $GetHTMLAttrsChecker->({}, {
3990     %HTMLAttrStatus,
3991     %HTMLM12NCommonAttrStatus,
3992     align => FEATURE_M12N10_REC_DEPRECATED,
3993     datafld => FEATURE_HTML4_REC_RESERVED,
3994     dataformatas => FEATURE_HTML4_REC_RESERVED,
3995     datasrc => FEATURE_HTML4_REC_RESERVED,
3996 wakaba 1.50 lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
3997 wakaba 1.49 }),
3998 wakaba 1.1 };
3999    
4000     $Element->{$HTML_NS}->{font} = {
4001 wakaba 1.40 %HTMLTransparentChecker,
4002 wakaba 1.50 status => FEATURE_HTML5_AT_RISK | FEATURE_M12N10_REC_DEPRECATED,
4003 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({ ## TODO
4004     }, {
4005     %HTMLAttrStatus,
4006 wakaba 1.50 class => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4007 wakaba 1.49 color => FEATURE_M12N10_REC_DEPRECATED,
4008 wakaba 1.50 dir => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4009 wakaba 1.49 face => FEATURE_M12N10_REC_DEPRECATED,
4010 wakaba 1.50 id => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4011     lang => FEATURE_HTML5_DEFAULT | FEATURE_XHTML10_REC,
4012 wakaba 1.49 size => FEATURE_M12N10_REC_DEPRECATED,
4013 wakaba 1.50 style => FEATURE_HTML5_AT_RISK | FEATURE_XHTML10_REC,
4014     title => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
4015 wakaba 1.49 }),
4016 wakaba 1.1 };
4017 wakaba 1.49
4018     ## TODO: frameset FEATURE_M12N10_REC
4019     ## class title id cols rows onload onunload style(x10)
4020     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
4021     ## noframes Common, lang(xhtml10)
4022    
4023     ## TODO: deprecated:
4024     ## basefont color face id size
4025     ## center Common lang(xhtml10)
4026     ## dir Common compat lang(xhtml10)
4027     ## isindex class dir id title prompt style(x10) lang(x10)
4028     ## s,strike,u Common xhtml10.lang
4029    
4030     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
4031 wakaba 1.1
4032     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
4033    
4034     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24