/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.49 - (hide annotations) (download)
Sun Feb 24 07:51:19 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.48: +705 -75 lines
++ whatpm/t/ChangeLog	24 Feb 2008 07:51:13 -0000
	* content-model-1.dat, content-model-2.dat: Some test
	results are updated.  New test data on non-standard
	attributes are added.  Note that some test results do
	still need to be updated.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	24 Feb 2008 07:47:32 -0000
	* ContentChecker.pm (_attr_status_info): New internal method.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Feb 2008 07:50:04 -0000
	* HTML.pm: Standardized status for attributes are added (HTML5
	info are not added yet).  Make element or attribute not defined
	by HTML5 spec as a conformance error (though it is not clearly
	defined by HTML5 spec as non-conforming).  Note that more work
	is needed for non-standard attribute thing, since the current
	implementation prevents non-standard attribute from being
	syntactically checked (otherwise it is treated as if a
	standard attribute).

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.48 sub FEATURE_HTML5_LC () { Whatpm::ContentChecker::FEATURE_STATUS_LC }
8     sub FEATURE_HTML5_AT_RISK () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
9     sub FEATURE_HTML5_WD () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
10     sub FEATURE_HTML5_FD () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
11     sub FEATURE_HTML5_DEFAULT () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
12 wakaba 1.49 sub FEATURE_HTML5_DROPPED () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
13     ## NOTE: Was part of HTML5, but was dropped.
14 wakaba 1.48 sub FEATURE_WF2 () { Whatpm::ContentChecker::FEATURE_STATUS_LC }
15 wakaba 1.49 sub FEATURE_M12N10_REC () { Whatpm::ContentChecker::FEATURE_STATUS_REC }
16     ## NOTE: Oh, XHTML m12n 1.0 passed the CR phase! W3C Process suck!
17     sub FEATURE_M12N10_REC_DEPRECATED () {
18     Whatpm::ContentChecker::FEATURE_STATUS_REC
19     }
20     sub FEATURE_XHTML10_REC () { Whatpm::ContentChecker::FEATURE_STATUS_CR }
21     sub FEATURE_HTML4_REC_RESERVED () {
22     Whatpm::ContentChecker::FEATURE_STATUS_WD
23     }
24    
25     ## NOTE: M12N10 status is based on its abstract module definition.
26     ## It contains a number of problems. (However, again, it's a REC!)
27    
28     ## NOTE: XHTML10 status is based on its transitional and frameset DTDs
29     ## (second edition). Only missing attributes from M12N10 abstract
30     ## definition are added.
31     ## NOTE: HTML4 status is based on its transitional and frameset DTDs (HTML
32     ## 4.01). Only missing attributes from XHTML10 are added.
33 wakaba 1.48
34 wakaba 1.29 ## December 2007 HTML5 Classification
35    
36     my $HTMLMetadataContent = {
37     $HTML_NS => {
38     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
39     'event-source' => 1, command => 1, datatemplate => 1,
40     ## NOTE: A |meta| with no |name| element is not allowed as
41     ## a metadata content other than |head| element.
42     meta => 1,
43     },
44     ## NOTE: RDF is mentioned in the HTML5 spec.
45     ## TODO: Other RDF elements?
46     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
47     };
48    
49     my $HTMLProseContent = {
50     $HTML_NS => {
51     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
52     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
53     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
54     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
55     details => 1, ## ISSUE: "Prose element" in spec.
56     datagrid => 1, ## ISSUE: "Prose element" in spec.
57     datatemplate => 1,
58     div => 1, ## ISSUE: No category in spec.
59     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
60     ## Additionally, it must be before any other element or
61     ## non-inter-element-whitespace text node.
62     style => 1,
63    
64 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
65 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
66     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
67     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
68     command => 1, font => 1,
69     a => 1,
70     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
71     ## NOTE: |area| is allowed only as a descendant of |map|.
72     area => 1,
73    
74     ins => 1, del => 1,
75    
76     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
77     menu => 1,
78    
79     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
80     canvas => 1,
81     },
82    
83     ## NOTE: Embedded
84     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
85     q<http://www.w3.org/2000/svg> => {svg => 1},
86     };
87    
88     my $HTMLSectioningContent = {
89     $HTML_NS => {
90     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
91     ## NOTE: |body| is only allowed in |html| element.
92     body => 1,
93     },
94     };
95    
96     my $HTMLHeadingContent = {
97     $HTML_NS => {
98     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
99     },
100     };
101    
102     my $HTMLPhrasingContent = {
103     ## NOTE: All phrasing content is also prose content.
104     $HTML_NS => {
105 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
106 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
107     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
108     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
109     command => 1, font => 1,
110     a => 1,
111     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
112     ## NOTE: |area| is allowed only as a descendant of |map|.
113     area => 1,
114    
115     ## NOTE: Transparent.
116     ins => 1, del => 1,
117    
118     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
119     menu => 1,
120    
121     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
122     canvas => 1,
123     },
124    
125     ## NOTE: Embedded
126     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
127     q<http://www.w3.org/2000/svg> => {svg => 1},
128    
129     ## NOTE: And non-inter-element-whitespace text nodes.
130     };
131    
132 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
133 wakaba 1.29
134     my $HTMLInteractiveContent = {
135     $HTML_NS => {
136     a => 1,
137 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
138 wakaba 1.29 },
139     };
140    
141 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
142     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
143    
144     ## -- Common attribute syntacx checkers
145    
146 wakaba 1.1 our $AttrChecker;
147    
148     my $GetHTMLEnumeratedAttrChecker = sub {
149     my $states = shift; # {value => conforming ? 1 : -1}
150     return sub {
151     my ($self, $attr) = @_;
152     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
153     if ($states->{$value} > 0) {
154     #
155     } elsif ($states->{$value}) {
156     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
157     } else {
158     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
159     }
160     };
161     }; # $GetHTMLEnumeratedAttrChecker
162    
163     my $GetHTMLBooleanAttrChecker = sub {
164     my $local_name = shift;
165     return sub {
166     my ($self, $attr) = @_;
167     my $value = $attr->value;
168     unless ($value eq $local_name or $value eq '') {
169     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
170     }
171     };
172     }; # $GetHTMLBooleanAttrChecker
173    
174 wakaba 1.8 ## Unordered set of space-separated tokens
175 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
176 wakaba 1.8 my ($self, $attr) = @_;
177     my %word;
178     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
179     unless ($word{$word}) {
180     $word{$word} = 1;
181     } else {
182     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
183     }
184     }
185 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
186 wakaba 1.8
187 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
188     ## whose allowed values are defined by the section on link types)
189     my $HTMLLinkTypesAttrChecker = sub {
190 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
191 wakaba 1.1 my %word;
192     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
193     unless ($word{$word}) {
194     $word{$word} = 1;
195 wakaba 1.18 } elsif ($word eq 'up') {
196     #
197 wakaba 1.1 } else {
198     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
199     }
200     }
201     ## NOTE: Case sensitive match (since HTML5 spec does not say link
202     ## types are case-insensitive and it says "The value should not
203     ## be confusingly similar to any other defined value (e.g.
204     ## differing only in case).").
205     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
206     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
207     ## values to be used conformingly.
208     require Whatpm::_LinkTypeList;
209     our $LinkType;
210     for my $word (keys %word) {
211     my $def = $LinkType->{$word};
212     if (defined $def) {
213     if ($def->{status} eq 'accepted') {
214     if (defined $def->{effect}->[$a_or_area]) {
215     #
216     } else {
217     $self->{onerror}->(node => $attr,
218     type => 'link type:bad context:'.$word);
219     }
220     } elsif ($def->{status} eq 'proposal') {
221     $self->{onerror}->(node => $attr, level => 's',
222     type => 'link type:proposed:'.$word);
223 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
224     #
225     } else {
226     $self->{onerror}->(node => $attr,
227     type => 'link type:bad context:'.$word);
228     }
229 wakaba 1.1 } else { # rejected or synonym
230     $self->{onerror}->(node => $attr,
231     type => 'link type:non-conforming:'.$word);
232     }
233 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
234     if ($word eq 'alternate') {
235     #
236     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
237     $todo->{has_hyperlink_link_type} = 1;
238     }
239     }
240 wakaba 1.1 if ($def->{unique}) {
241     unless ($self->{has_link_type}->{$word}) {
242     $self->{has_link_type}->{$word} = 1;
243     } else {
244     $self->{onerror}->(node => $attr,
245     type => 'link type:duplicate:'.$word);
246     }
247     }
248     } else {
249     $self->{onerror}->(node => $attr, level => 'unsupported',
250     type => 'link type:'.$word);
251     }
252     }
253 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
254     if $word{alternate} and not $word{stylesheet};
255 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
256     ## says that using both X-Pingback: header field and HTML
257     ## <link rel=pingback> is deprecated and if both appears they
258     ## SHOULD contain exactly the same value.
259     ## ISSUE: Pingback 1.0 specification defines the exact representation
260     ## of its link element, which cannot be tested by the current arch.
261     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
262     ## include any string that matches to the pattern for the rel=pingback link,
263     ## which again inpossible to test.
264     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
265 wakaba 1.12
266     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
267 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
268     ## then they SHOULD be described in different paragraphs.".
269 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
270 wakaba 1.20
271     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
272 wakaba 1.1
273     ## URI (or IRI)
274     my $HTMLURIAttrChecker = sub {
275     my ($self, $attr) = @_;
276     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
277     my $value = $attr->value;
278     Whatpm::URIChecker->check_iri_reference ($value, sub {
279     my %opt = @_;
280     $self->{onerror}->(node => $attr, level => $opt{level},
281     type => 'URI::'.$opt{type}.
282     (defined $opt{position} ? ':'.$opt{position} : ''));
283     });
284 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
285 wakaba 1.1 }; # $HTMLURIAttrChecker
286    
287     ## A space separated list of one or more URIs (or IRIs)
288     my $HTMLSpaceURIsAttrChecker = sub {
289     my ($self, $attr) = @_;
290     my $i = 0;
291     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
292     Whatpm::URIChecker->check_iri_reference ($value, sub {
293     my %opt = @_;
294     $self->{onerror}->(node => $attr, level => $opt{level},
295 wakaba 1.2 type => 'URIs:'.':'.
296     $opt{type}.':'.$i.
297 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
298     });
299     $i++;
300     }
301     ## ISSUE: Relative references?
302     ## ISSUE: Leading or trailing white spaces are conformant?
303     ## ISSUE: A sequence of white space characters are conformant?
304     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
305     ## NOTE: Duplication seems not an error.
306 wakaba 1.4 $self->{has_uri_attr} = 1;
307 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
308    
309     my $HTMLDatetimeAttrChecker = sub {
310     my ($self, $attr) = @_;
311     my $value = $attr->value;
312     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
313     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
314     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
315     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
316     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
317     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
318     if $d < 1 or
319     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
320     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
321     if $M == 2 and $d == 29 and
322     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
323     } else {
324     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
325     }
326     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
327     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
328     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
329     if defined $s and $s > 59;
330     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
331     if $zh > 23;
332     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
333     if $zm > 59;
334     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
335     } else {
336     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
337     }
338     }; # $HTMLDatetimeAttrChecker
339    
340     my $HTMLIntegerAttrChecker = sub {
341     my ($self, $attr) = @_;
342     my $value = $attr->value;
343     unless ($value =~ /\A-?[0-9]+\z/) {
344     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
345     }
346     }; # $HTMLIntegerAttrChecker
347    
348     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
349     my $range_check = shift;
350     return sub {
351     my ($self, $attr) = @_;
352     my $value = $attr->value;
353     if ($value =~ /\A[0-9]+\z/) {
354     unless ($range_check->($value + 0)) {
355     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
356     }
357     } else {
358     $self->{onerror}->(node => $attr,
359     type => 'nninteger:syntax error');
360     }
361     };
362     }; # $GetHTMLNonNegativeIntegerAttrChecker
363    
364     my $GetHTMLFloatingPointNumberAttrChecker = sub {
365     my $range_check = shift;
366     return sub {
367     my ($self, $attr) = @_;
368     my $value = $attr->value;
369     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
370     unless ($range_check->($value + 0)) {
371     $self->{onerror}->(node => $attr, type => 'float:out of range');
372     }
373     } else {
374     $self->{onerror}->(node => $attr,
375     type => 'float:syntax error');
376     }
377     };
378     }; # $GetHTMLFloatingPointNumberAttrChecker
379    
380     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
381     ## ISSUE: RFC 2046 does not define syntax of media types.
382     ## ISSUE: The definition of "a valid MIME type" is unknown.
383     ## Syntactical correctness?
384     my $HTMLIMTAttrChecker = sub {
385     my ($self, $attr) = @_;
386     my $value = $attr->value;
387     ## ISSUE: RFC 2045 Content-Type header field allows insertion
388     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
389     ## ISSUE: RFC 2231 extension? Maybe no.
390     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
391     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
392     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
393     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
394     my @type = ($1, $2);
395     my $param = $3;
396     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
397     if (defined $2) {
398     push @type, $1 => $2;
399     } else {
400     my $n = $1;
401     my $v = $2;
402     $v =~ s/\\(.)/$1/gs;
403     push @type, $n => $v;
404     }
405     }
406     require Whatpm::IMTChecker;
407     Whatpm::IMTChecker->check_imt (sub {
408     my %opt = @_;
409     $self->{onerror}->(node => $attr, level => $opt{level},
410     type => 'IMT:'.$opt{type});
411     }, @type);
412     } else {
413     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
414     }
415     }; # $HTMLIMTAttrChecker
416    
417     my $HTMLLanguageTagAttrChecker = sub {
418 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
419    
420 wakaba 1.1 my ($self, $attr) = @_;
421 wakaba 1.6 my $value = $attr->value;
422     require Whatpm::LangTag;
423     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
424     my %opt = @_;
425     my $type = 'LangTag:'.$opt{type};
426     $type .= ':' . $opt{subtag} if defined $opt{subtag};
427     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
428     level => $opt{level});
429     });
430 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
431 wakaba 1.6
432     ## TODO: testdata
433 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
434    
435     ## "A valid media query [MQ]"
436     my $HTMLMQAttrChecker = sub {
437     my ($self, $attr) = @_;
438     $self->{onerror}->(node => $attr, level => 'unsupported',
439     type => 'media query');
440     ## ISSUE: What is "a valid media query"?
441     }; # $HTMLMQAttrChecker
442    
443     my $HTMLEventHandlerAttrChecker = sub {
444     my ($self, $attr) = @_;
445     $self->{onerror}->(node => $attr, level => 'unsupported',
446     type => 'event handler');
447     ## TODO: MUST contain valid ECMAScript code matching the
448     ## ECMAScript |FunctionBody| production. [ECMA262]
449     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
450     ## ISSUE: Automatic semicolon insertion does not apply?
451     ## ISSUE: Other script languages?
452     }; # $HTMLEventHandlerAttrChecker
453    
454     my $HTMLUsemapAttrChecker = sub {
455     my ($self, $attr) = @_;
456     ## MUST be a valid hashed ID reference to a |map| element
457     my $value = $attr->value;
458     if ($value =~ s/^#//) {
459     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
460     push @{$self->{usemap}}, [$value => $attr];
461     } else {
462     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
463     }
464     ## NOTE: Space characters in hashed ID references are conforming.
465     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
466     }; # $HTMLUsemapAttrChecker
467    
468     my $HTMLTargetAttrChecker = sub {
469     my ($self, $attr) = @_;
470     my $value = $attr->value;
471     if ($value =~ /^_/) {
472     $value = lc $value; ## ISSUE: ASCII case-insentitive?
473     unless ({
474     _self => 1, _parent => 1, _top => 1,
475     }->{$value}) {
476     $self->{onerror}->(node => $attr,
477     type => 'reserved browsing context name');
478     }
479     } else {
480 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
481 wakaba 1.1 }
482     }; # $HTMLTargetAttrChecker
483    
484 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
485     my ($self, $attr) = @_;
486    
487     ## ISSUE: Namespace resolution?
488    
489     my $value = $attr->value;
490    
491     require Whatpm::CSS::SelectorsParser;
492     my $p = Whatpm::CSS::SelectorsParser->new;
493     $p->{pseudo_class}->{$_} = 1 for qw/
494     active checked disabled empty enabled first-child first-of-type
495     focus hover indeterminate last-child last-of-type link only-child
496     only-of-type root target visited
497     lang nth-child nth-last-child nth-of-type nth-last-of-type not
498     -manakai-contains -manakai-current
499     /;
500    
501     $p->{pseudo_element}->{$_} = 1 for qw/
502     after before first-letter first-line
503     /;
504    
505     $p->{must_level} = $self->{must_level};
506     $p->{onerror} = sub {
507     my %opt = @_;
508     $opt{type} = 'selectors:'.$opt{type};
509     $self->{onerror}->(%opt, node => $attr);
510     };
511     $p->parse_string ($value);
512     }; # $HTMLSelectorsAttrChecker
513    
514 wakaba 1.1 my $HTMLAttrChecker = {
515     id => sub {
516     ## NOTE: |map| has its own variant of |id=""| checker
517     my ($self, $attr) = @_;
518     my $value = $attr->value;
519     if (length $value > 0) {
520     if ($self->{id}->{$value}) {
521     $self->{onerror}->(node => $attr, type => 'duplicate ID');
522     push @{$self->{id}->{$value}}, $attr;
523     } else {
524     $self->{id}->{$value} = [$attr];
525     }
526     if ($value =~ /[\x09-\x0D\x20]/) {
527     $self->{onerror}->(node => $attr, type => 'space in ID');
528     }
529     } else {
530     ## NOTE: MUST contain at least one character
531     $self->{onerror}->(node => $attr, type => 'empty attribute value');
532     }
533     },
534     title => sub {}, ## NOTE: No conformance creteria
535     lang => sub {
536     my ($self, $attr) = @_;
537 wakaba 1.6 my $value = $attr->value;
538     if ($value eq '') {
539     #
540     } else {
541     require Whatpm::LangTag;
542     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
543     my %opt = @_;
544     my $type = 'LangTag:'.$opt{type};
545     $type .= ':' . $opt{subtag} if defined $opt{subtag};
546     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
547     level => $opt{level});
548     });
549     }
550 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
551     unless ($attr->owner_document->manakai_is_html) {
552     $self->{onerror}->(node => $attr, type => 'in XML:lang');
553     }
554 wakaba 1.6
555     ## TODO: test data
556 wakaba 1.1 },
557     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
558     class => sub {
559     my ($self, $attr) = @_;
560     my %word;
561     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
562     unless ($word{$word}) {
563     $word{$word} = 1;
564     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
565     } else {
566     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
567     }
568     }
569     },
570     contextmenu => sub {
571     my ($self, $attr) = @_;
572     my $value = $attr->value;
573     push @{$self->{contextmenu}}, [$value => $attr];
574     ## ISSUE: "The value must be the ID of a menu element in the DOM."
575     ## What is "in the DOM"? A menu Element node that is not part
576     ## of the Document tree is in the DOM? A menu Element node that
577     ## belong to another Document tree is in the DOM?
578     },
579 wakaba 1.48 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), ## TODO: status: Working Draft
580 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
581     ## TODO: ref, template, registrationmark
582 wakaba 1.1 };
583    
584 wakaba 1.49 my %HTMLAttrStatus = (
585    
586     );
587    
588     my %HTMLM12NCommonAttrStatus = (
589     class => FEATURE_M12N10_REC,
590     dir => FEATURE_M12N10_REC,
591     id => FEATURE_M12N10_REC,
592     onclick => FEATURE_M12N10_REC,
593     ondblclick => FEATURE_M12N10_REC,
594     onmousedown => FEATURE_M12N10_REC,
595     onmouseup => FEATURE_M12N10_REC,
596     onmouseover => FEATURE_M12N10_REC,
597     onmousemove => FEATURE_M12N10_REC,
598     onmouseout => FEATURE_M12N10_REC,
599     onkeypress => FEATURE_M12N10_REC,
600     onkeydown => FEATURE_M12N10_REC,
601     onkeyup => FEATURE_M12N10_REC,
602     style => FEATURE_M12N10_REC,
603     title => FEATURE_M12N10_REC,
604     );
605    
606 wakaba 1.1 for (qw/
607     onabort onbeforeunload onblur onchange onclick oncontextmenu
608     ondblclick ondrag ondragend ondragenter ondragleave ondragover
609     ondragstart ondrop onerror onfocus onkeydown onkeypress
610     onkeyup onload onmessage onmousedown onmousemove onmouseout
611     onmouseover onmouseup onmousewheel onresize onscroll onselect
612     onsubmit onunload
613     /) {
614     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
615     }
616    
617     my $GetHTMLAttrsChecker = sub {
618     my $element_specific_checker = shift;
619 wakaba 1.49 my $element_specific_status = shift;
620 wakaba 1.1 return sub {
621 wakaba 1.40 my ($self, $item, $element_state) = @_;
622     for my $attr (@{$item->{node}->attributes}) {
623 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
624     $attr_ns = '' unless defined $attr_ns;
625     my $attr_ln = $attr->manakai_local_name;
626     my $checker;
627     if ($attr_ns eq '') {
628     $checker = $element_specific_checker->{$attr_ln}
629 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
630 wakaba 1.1 }
631     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
632 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
633 wakaba 1.1 if ($checker) {
634 wakaba 1.40 $checker->($self, $attr, $item);
635 wakaba 1.49 } elsif ($attr_ns eq '') {
636     $self->{onerror}->(node => $attr, level => $self->{must_level},
637     type => 'attribute not defined');
638 wakaba 1.1 } else {
639     $self->{onerror}->(node => $attr, level => 'unsupported',
640     type => 'attribute');
641 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
642     }
643     if ($attr_ns eq '') {
644     $self->_attr_status_info ($attr, $element_specific_status->{$attr_ln});
645 wakaba 1.1 }
646 wakaba 1.49 ## TODO: global attribute
647 wakaba 1.1 }
648     };
649     }; # $GetHTMLAttrsChecker
650    
651 wakaba 1.40 my %HTMLChecker = (
652     %Whatpm::ContentChecker::AnyChecker,
653 wakaba 1.49 check_attrs => $GetHTMLAttrsChecker->({}, \%HTMLAttrStatus),
654 wakaba 1.40 );
655    
656     my %HTMLEmptyChecker = (
657     %HTMLChecker,
658     check_child_element => sub {
659     my ($self, $item, $child_el, $child_nsuri, $child_ln,
660     $child_is_transparent, $element_state) = @_;
661     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
662     $self->{onerror}->(node => $child_el,
663     type => 'element not allowed:minus',
664     level => $self->{must_level});
665     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
666     #
667     } else {
668     $self->{onerror}->(node => $child_el,
669     type => 'element not allowed:empty',
670     level => $self->{must_level});
671     }
672     },
673     check_child_text => sub {
674     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
675     if ($has_significant) {
676     $self->{onerror}->(node => $child_node,
677     type => 'character not allowed:empty',
678     level => $self->{must_level});
679     }
680     },
681     );
682    
683     my %HTMLTextChecker = (
684     %HTMLChecker,
685     check_child_element => sub {
686     my ($self, $item, $child_el, $child_nsuri, $child_ln,
687     $child_is_transparent, $element_state) = @_;
688     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
689     $self->{onerror}->(node => $child_el,
690     type => 'element not allowed:minus',
691     level => $self->{must_level});
692     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
693     #
694     } else {
695     $self->{onerror}->(node => $child_el, type => 'element not allowed');
696     }
697     },
698     );
699    
700     my %HTMLProseContentChecker = (
701     %HTMLChecker,
702     check_child_element => sub {
703     my ($self, $item, $child_el, $child_nsuri, $child_ln,
704     $child_is_transparent, $element_state) = @_;
705     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
706     $self->{onerror}->(node => $child_el,
707     type => 'element not allowed:minus',
708     level => $self->{must_level});
709     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
710     #
711     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
712     if ($element_state->{has_non_style} or
713     not $child_el->has_attribute_ns (undef, 'scoped')) {
714     $self->{onerror}->(node => $child_el,
715     type => 'element not allowed:prose style',
716     level => $self->{must_level});
717     }
718     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
719 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
720 wakaba 1.40 } else {
721     $element_state->{has_non_style} = 1;
722     $self->{onerror}->(node => $child_el,
723     type => 'element not allowed:prose',
724     level => $self->{must_level})
725     }
726     },
727     check_child_text => sub {
728     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
729     if ($has_significant) {
730     $element_state->{has_non_style} = 1;
731     }
732     },
733     check_end => sub {
734     my ($self, $item, $element_state) = @_;
735     if ($element_state->{has_significant}) {
736 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
737 wakaba 1.40 } elsif ($item->{transparent}) {
738     #
739     } else {
740     $self->{onerror}->(node => $item->{node},
741     level => $self->{should_level},
742     type => 'no significant content');
743     }
744     },
745     );
746    
747     my %HTMLPhrasingContentChecker = (
748     %HTMLChecker,
749     check_child_element => sub {
750     my ($self, $item, $child_el, $child_nsuri, $child_ln,
751     $child_is_transparent, $element_state) = @_;
752     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
753     $self->{onerror}->(node => $child_el,
754     type => 'element not allowed:minus',
755     level => $self->{must_level});
756     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
757     #
758     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
759     #
760     } else {
761     $self->{onerror}->(node => $child_el,
762     type => 'element not allowed:phrasing',
763     level => $self->{must_level});
764     }
765     },
766     check_end => $HTMLProseContentChecker{check_end},
767     ## NOTE: The definition for |li| assumes that the only differences
768     ## between prose and phrasing content checkers are |check_child_element|
769     ## and |check_child_text|.
770     );
771    
772     my %HTMLTransparentChecker = %HTMLProseContentChecker;
773     ## ISSUE: Significant content rule should be applied to transparent element
774 wakaba 1.46 ## with parent?
775 wakaba 1.40
776 wakaba 1.1 our $Element;
777     our $ElementDefault;
778    
779     $Element->{$HTML_NS}->{''} = {
780 wakaba 1.40 %HTMLChecker,
781 wakaba 1.49 check_start => sub {
782     my ($self, $item, $element_state) = @_;
783     $self->{onerror}->(node => $item->{node}, level => $self->{must_level},
784     type => 'element not defined');
785     },
786 wakaba 1.1 };
787    
788     $Element->{$HTML_NS}->{html} = {
789 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
790 wakaba 1.1 is_root => 1,
791 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
792 wakaba 1.16 manifest => $HTMLURIAttrChecker,
793 wakaba 1.1 xmlns => sub {
794     my ($self, $attr) = @_;
795     my $value = $attr->value;
796     unless ($value eq $HTML_NS) {
797     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
798     }
799     unless ($attr->owner_document->manakai_is_html) {
800     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
801     ## TODO: Test
802     }
803     },
804 wakaba 1.49 }, {
805     %HTMLAttrStatus,
806     dir => FEATURE_M12N10_REC,
807     id => FEATURE_XHTML10_REC,
808     lang => FEATURE_XHTML10_REC,
809     version => FEATURE_M12N10_REC,
810 wakaba 1.1 }),
811 wakaba 1.40 check_start => sub {
812     my ($self, $item, $element_state) = @_;
813     $element_state->{phase} = 'before head';
814     },
815     check_child_element => sub {
816     my ($self, $item, $child_el, $child_nsuri, $child_ln,
817     $child_is_transparent, $element_state) = @_;
818     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
819     $self->{onerror}->(node => $child_el,
820     type => 'element not allowed:minus',
821     level => $self->{must_level});
822     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
823     #
824     } elsif ($element_state->{phase} eq 'before head') {
825     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
826     $element_state->{phase} = 'after head';
827     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
828     $self->{onerror}->(node => $child_el,
829     type => 'ps element missing:head');
830     $element_state->{phase} = 'after body';
831     } else {
832     $self->{onerror}->(node => $child_el,
833     type => 'element not allowed');
834     }
835     } elsif ($element_state->{phase} eq 'after head') {
836     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
837     $element_state->{phase} = 'after body';
838     } else {
839     $self->{onerror}->(node => $child_el,
840     type => 'element not allowed');
841     }
842     } elsif ($element_state->{phase} eq 'after body') {
843     $self->{onerror}->(node => $child_el,
844     type => 'element not allowed');
845     } else {
846     die "check_child_element: Bad |html| phase: $element_state->{phase}";
847     }
848     },
849     check_child_text => sub {
850     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
851     if ($has_significant) {
852     $self->{onerror}->(node => $child_node,
853     type => 'character not allowed');
854     }
855     },
856     check_end => sub {
857     my ($self, $item, $element_state) = @_;
858     if ($element_state->{phase} eq 'after body') {
859     #
860     } elsif ($element_state->{phase} eq 'before head') {
861     $self->{onerror}->(node => $item->{node},
862     type => 'child element missing:head');
863     $self->{onerror}->(node => $item->{node},
864     type => 'child element missing:body');
865     } elsif ($element_state->{phase} eq 'after head') {
866     $self->{onerror}->(node => $item->{node},
867     type => 'child element missing:body');
868     } else {
869     die "check_end: Bad |html| phase: $element_state->{phase}";
870     }
871 wakaba 1.1
872 wakaba 1.40 $HTMLChecker{check_end}->(@_);
873     },
874     };
875 wakaba 1.25
876 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
877 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
878     check_attrs => $GetHTMLAttrsChecker->({}, {
879     %HTMLAttrStatus,
880     dir => FEATURE_M12N10_REC,
881     id => FEATURE_XHTML10_REC,
882     lang => FEATURE_XHTML10_REC,
883     profile => FEATURE_HTML5_DROPPED | FEATURE_M12N10_REC,
884     }),
885 wakaba 1.40 check_child_element => sub {
886     my ($self, $item, $child_el, $child_nsuri, $child_ln,
887     $child_is_transparent, $element_state) = @_;
888     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
889     $self->{onerror}->(node => $child_el,
890     type => 'element not allowed:minus',
891     level => $self->{must_level});
892     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
893     #
894     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
895     unless ($element_state->{has_title}) {
896     $element_state->{has_title} = 1;
897     } else {
898     $self->{onerror}->(node => $child_el,
899     type => 'element not allowed:head title',
900     level => $self->{must_level});
901     }
902     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
903     if ($child_el->has_attribute_ns (undef, 'scoped')) {
904     $self->{onerror}->(node => $child_el,
905     type => 'element not allowed:head style',
906     level => $self->{must_level});
907 wakaba 1.1 }
908 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
909     #
910    
911     ## NOTE: |meta| is a metadata content. However, strictly speaking,
912     ## a |meta| element with none of |charset|, |name|,
913     ## or |http-equiv| attribute is not allowed. It is non-conforming
914     ## anyway.
915     } else {
916     $self->{onerror}->(node => $child_el,
917     type => 'element not allowed:metadata',
918     level => $self->{must_level});
919     }
920     $element_state->{in_head_original} = $self->{flag}->{in_head};
921     $self->{flag}->{in_head} = 1;
922     },
923     check_child_text => sub {
924     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
925     if ($has_significant) {
926     $self->{onerror}->(node => $child_node, type => 'character not allowed');
927 wakaba 1.1 }
928 wakaba 1.40 },
929     check_end => sub {
930     my ($self, $item, $element_state) = @_;
931     unless ($element_state->{has_title}) {
932     $self->{onerror}->(node => $item->{node},
933     type => 'child element missing:title');
934 wakaba 1.1 }
935 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
936 wakaba 1.1
937 wakaba 1.40 $HTMLChecker{check_end}->(@_);
938 wakaba 1.1 },
939     };
940    
941 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
942     %HTMLTextChecker,
943 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
944     check_attrs => $GetHTMLAttrsChecker->({}, {
945     %HTMLAttrStatus,
946     dir => FEATURE_M12N10_REC,
947     id => FEATURE_XHTML10_REC,
948     lang => FEATURE_XHTML10_REC,
949     }),
950 wakaba 1.40 };
951 wakaba 1.1
952 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
953 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
954 wakaba 1.40 %HTMLEmptyChecker,
955     check_attrs => sub {
956     my ($self, $item, $element_state) = @_;
957 wakaba 1.1
958 wakaba 1.40 if ($self->{has_base}) {
959     $self->{onerror}->(node => $item->{node},
960     type => 'element not allowed:base');
961     } else {
962     $self->{has_base} = 1;
963 wakaba 1.29 }
964    
965 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
966     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
967 wakaba 1.14
968     if ($self->{has_uri_attr} and $has_href) {
969 wakaba 1.4 ## ISSUE: Are these examples conforming?
970     ## <head profile="a b c"><base href> (except for |profile|'s
971     ## non-conformance)
972     ## <title xml:base="relative"/><base href/> (maybe it should be)
973     ## <unknown xmlns="relative"/><base href/> (assuming that
974     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
975     ## <style>@import 'relative';</style><base href>
976     ## <script>location.href = 'relative';</script><base href>
977 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
978     ## an exception.
979 wakaba 1.40 $self->{onerror}->(node => $item->{node},
980 wakaba 1.4 type => 'basehref after URI attribute');
981     }
982 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
983 wakaba 1.4 ## ISSUE: Are these examples conforming?
984     ## <head><title xlink:href=""/><base target="name"/></head>
985     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
986     ## (assuming that |xbl:xbl| is allowed before |base|)
987     ## NOTE: These are non-conformant anyway because of |head|'s content model:
988     ## <link href=""/><base target="name"/>
989     ## <link rel=unknown href=""><base target=name>
990 wakaba 1.40 $self->{onerror}->(node => $item->{node},
991 wakaba 1.4 type => 'basetarget after hyperlink');
992     }
993    
994 wakaba 1.14 if (not $has_href and not $has_target) {
995 wakaba 1.40 $self->{onerror}->(node => $item->{node},
996 wakaba 1.14 type => 'attribute missing:href|target');
997     }
998    
999 wakaba 1.4 return $GetHTMLAttrsChecker->({
1000     href => $HTMLURIAttrChecker,
1001     target => $HTMLTargetAttrChecker,
1002 wakaba 1.49 }, {
1003     %HTMLAttrStatus,
1004     href => FEATURE_M12N10_REC,
1005     id => FEATURE_XHTML10_REC,
1006     target => FEATURE_M12N10_REC,
1007 wakaba 1.40 })->($self, $item, $element_state);
1008 wakaba 1.4 },
1009 wakaba 1.1 };
1010    
1011     $Element->{$HTML_NS}->{link} = {
1012 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1013 wakaba 1.40 %HTMLEmptyChecker,
1014     check_attrs => sub {
1015     my ($self, $item, $element_state) = @_;
1016 wakaba 1.1 $GetHTMLAttrsChecker->({
1017     href => $HTMLURIAttrChecker,
1018 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
1019 wakaba 1.1 media => $HTMLMQAttrChecker,
1020     hreflang => $HTMLLanguageTagAttrChecker,
1021     type => $HTMLIMTAttrChecker,
1022     ## NOTE: Though |title| has special semantics,
1023     ## syntactically same as the |title| as global attribute.
1024 wakaba 1.49 }, {
1025     %HTMLAttrStatus,
1026     %HTMLM12NCommonAttrStatus,
1027     charset => FEATURE_M12N10_REC,
1028     href => FEATURE_M12N10_REC,
1029     hreflang => FEATURE_M12N10_REC,
1030     lang => FEATURE_XHTML10_REC,
1031     media => FEATURE_M12N10_REC,
1032     rel => FEATURE_M12N10_REC,
1033     rev => FEATURE_M12N10_REC,
1034     target => FEATURE_M12N10_REC,
1035     type => FEATURE_M12N10_REC,
1036 wakaba 1.40 })->($self, $item, $element_state);
1037     if ($item->{node}->has_attribute_ns (undef, 'href')) {
1038     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
1039 wakaba 1.4 } else {
1040 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1041 wakaba 1.1 type => 'attribute missing:href');
1042     }
1043 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
1044     $self->{onerror}->(node => $item->{node},
1045 wakaba 1.1 type => 'attribute missing:rel');
1046     }
1047     },
1048     };
1049    
1050     $Element->{$HTML_NS}->{meta} = {
1051 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1052 wakaba 1.40 %HTMLEmptyChecker,
1053     check_attrs => sub {
1054     my ($self, $item, $element_state) = @_;
1055 wakaba 1.1 my $name_attr;
1056     my $http_equiv_attr;
1057     my $charset_attr;
1058     my $content_attr;
1059 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1060 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1061     $attr_ns = '' unless defined $attr_ns;
1062     my $attr_ln = $attr->manakai_local_name;
1063     my $checker;
1064     if ($attr_ns eq '') {
1065     if ($attr_ln eq 'content') {
1066     $content_attr = $attr;
1067     $checker = 1;
1068     } elsif ($attr_ln eq 'name') {
1069     $name_attr = $attr;
1070     $checker = 1;
1071     } elsif ($attr_ln eq 'http-equiv') {
1072     $http_equiv_attr = $attr;
1073     $checker = 1;
1074     } elsif ($attr_ln eq 'charset') {
1075     $charset_attr = $attr;
1076     $checker = 1;
1077     } else {
1078     $checker = $HTMLAttrChecker->{$attr_ln}
1079     || $AttrChecker->{$attr_ns}->{$attr_ln}
1080     || $AttrChecker->{$attr_ns}->{''};
1081     }
1082     } else {
1083     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1084     || $AttrChecker->{$attr_ns}->{''};
1085     }
1086     if ($checker) {
1087     $checker->($self, $attr) if ref $checker;
1088 wakaba 1.49 } elsif ($attr_ns eq '') {
1089     $self->{onerror}->(node => $attr, level => $self->{must_level},
1090     type => 'attribute not defined');
1091 wakaba 1.1 } else {
1092     $self->{onerror}->(node => $attr, level => 'unsupported',
1093     type => 'attribute');
1094 wakaba 1.49 ## ISSUE: No conformance createria for unknown attributes in the spec
1095     }
1096    
1097     if ($attr_ns eq '') {
1098     $self->_attr_status_info ($attr, {
1099     %HTMLAttrStatus,
1100     content => FEATURE_M12N10_REC,
1101     dir => FEATURE_M12N10_REC,
1102     'http-equiv' => FEATURE_M12N10_REC,
1103     id => FEATURE_XHTML10_REC,
1104     lang => FEATURE_XHTML10_REC,
1105     name => FEATURE_M12N10_REC,
1106     scheme => FEATURE_M12N10_REC,
1107     }->{$attr_ln});
1108 wakaba 1.1 }
1109     }
1110    
1111     if (defined $name_attr) {
1112     if (defined $http_equiv_attr) {
1113     $self->{onerror}->(node => $http_equiv_attr,
1114     type => 'attribute not allowed');
1115     } elsif (defined $charset_attr) {
1116     $self->{onerror}->(node => $charset_attr,
1117     type => 'attribute not allowed');
1118     }
1119     my $metadata_name = $name_attr->value;
1120     my $metadata_value;
1121     if (defined $content_attr) {
1122     $metadata_value = $content_attr->value;
1123     } else {
1124 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1125 wakaba 1.1 type => 'attribute missing:content');
1126     $metadata_value = '';
1127     }
1128     } elsif (defined $http_equiv_attr) {
1129     if (defined $charset_attr) {
1130     $self->{onerror}->(node => $charset_attr,
1131     type => 'attribute not allowed');
1132     }
1133     unless (defined $content_attr) {
1134 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1135 wakaba 1.1 type => 'attribute missing:content');
1136     }
1137     } elsif (defined $charset_attr) {
1138     if (defined $content_attr) {
1139     $self->{onerror}->(node => $content_attr,
1140     type => 'attribute not allowed');
1141     }
1142     } else {
1143     if (defined $content_attr) {
1144     $self->{onerror}->(node => $content_attr,
1145     type => 'attribute not allowed');
1146 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1147 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1148     } else {
1149 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1150 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1151     }
1152     }
1153    
1154 wakaba 1.32 my $check_charset_decl = sub () {
1155 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1156 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1157     for my $el (@{$parent->child_nodes}) {
1158     next unless $el->node_type == 1; # ELEMENT_NODE
1159 wakaba 1.40 unless ($el eq $item->{node}) {
1160 wakaba 1.29 ## NOTE: Not the first child element.
1161 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1162 wakaba 1.32 type => 'element not allowed:meta charset',
1163     level => $self->{must_level});
1164 wakaba 1.29 }
1165     last;
1166     ## NOTE: Entity references are not supported.
1167     }
1168     } else {
1169 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1170 wakaba 1.32 type => 'element not allowed:meta charset',
1171     level => $self->{must_level});
1172 wakaba 1.29 }
1173    
1174 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1175     $self->{onerror}->(node => $item->{node},
1176 wakaba 1.32 type => 'in XML:charset',
1177     level => $self->{must_level});
1178 wakaba 1.1 }
1179 wakaba 1.32 }; # $check_charset_decl
1180 wakaba 1.21
1181 wakaba 1.32 my $check_charset = sub ($$) {
1182     my ($attr, $charset_value) = @_;
1183 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1184     ## is not explicitly spelled in the HTML5 spec, the Character Set
1185     ## registry of IANA, which is referenced from HTML5 spec, says that
1186     ## charset name is case-insensitive.
1187     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1188    
1189     require Message::Charset::Info;
1190     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1191 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1192 wakaba 1.21 if (defined $ic) {
1193     ## TODO: Test for this case
1194     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1195     if ($charset ne $ic_charset) {
1196 wakaba 1.32 $self->{onerror}->(node => $attr,
1197 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1198 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1199     level => $self->{must_level});
1200 wakaba 1.21 }
1201     } else {
1202     ## NOTE: MUST, but not checkable, since the document is not originally
1203     ## in serialized form (or the parser does not preserve the input
1204     ## encoding information).
1205 wakaba 1.32 $self->{onerror}->(node => $attr,
1206     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1207 wakaba 1.21 level => 'unsupported');
1208     }
1209    
1210     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1211     ## Syntactically valid and registered? What about x-charset names?
1212     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1213     ($charset_value)) {
1214 wakaba 1.32 $self->{onerror}->(node => $attr,
1215     type => 'charset:syntax error:'.$charset_value, ## TODO
1216     level => $self->{must_level});
1217 wakaba 1.21 }
1218    
1219     if ($charset) {
1220     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1221     ## with no "preferred MIME name" label)?
1222     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1223     if (($charset_status &
1224     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1225     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1226 wakaba 1.32 $self->{onerror}->(node => $attr,
1227 wakaba 1.21 type => 'charset:not preferred:'.
1228 wakaba 1.32 $charset_value, ## TODO
1229     level => $self->{must_level});
1230 wakaba 1.21 }
1231     if (($charset_status &
1232     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1233     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1234     if ($charset_value =~ /^x-/) {
1235 wakaba 1.32 $self->{onerror}->(node => $attr,
1236     type => 'charset:private:'.$charset_value, ## TODO
1237 wakaba 1.21 level => $self->{good_level});
1238     } else {
1239 wakaba 1.32 $self->{onerror}->(node => $attr,
1240 wakaba 1.21 type => 'charset:not registered:'.
1241 wakaba 1.32 $charset_value, ## TODO
1242 wakaba 1.21 level => $self->{good_level});
1243     }
1244     }
1245     } elsif ($charset_value =~ /^x-/) {
1246 wakaba 1.32 $self->{onerror}->(node => $attr,
1247     type => 'charset:private:'.$charset_value, ## TODO
1248 wakaba 1.21 level => $self->{good_level});
1249     } else {
1250 wakaba 1.32 $self->{onerror}->(node => $attr,
1251     type => 'charset:not registered:'.$charset_value, ## TODO
1252 wakaba 1.21 level => $self->{good_level});
1253     }
1254    
1255 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1256     $self->{onerror}->(node => $attr,
1257 wakaba 1.22 type => 'character reference in charset',
1258     level => $self->{must_level});
1259     }
1260 wakaba 1.32 }; # $check_charset
1261    
1262     ## TODO: metadata conformance
1263    
1264     ## TODO: pragma conformance
1265     if (defined $http_equiv_attr) { ## An enumerated attribute
1266     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1267     if ({
1268     'refresh' => 1,
1269     'default-style' => 1,
1270     }->{$keyword}) {
1271     #
1272 wakaba 1.33
1273     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1274 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1275 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1276     ## 1221, there are still many occurence of "Content-Type" state in
1277     ## the spec.
1278    
1279 wakaba 1.32 $check_charset_decl->();
1280     if ($content_attr) {
1281     my $content = $content_attr->value;
1282     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1283     $check_charset->($content_attr, $1);
1284     } else {
1285     $self->{onerror}->(node => $content_attr,
1286     type => 'meta content-type syntax error',
1287     level => $self->{must_level});
1288     }
1289     }
1290     } else {
1291     $self->{onerror}->(node => $http_equiv_attr,
1292     type => 'enumerated:invalid');
1293     }
1294     }
1295    
1296     if (defined $charset_attr) {
1297     $check_charset_decl->();
1298     $check_charset->($charset_attr, $charset_attr->value);
1299 wakaba 1.1 }
1300     },
1301     };
1302    
1303     $Element->{$HTML_NS}->{style} = {
1304 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1305 wakaba 1.40 %HTMLChecker,
1306     check_attrs => $GetHTMLAttrsChecker->({
1307 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1308     media => $HTMLMQAttrChecker,
1309     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1310     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1311     ## not different
1312 wakaba 1.49 }, {
1313     %HTMLAttrStatus,
1314     dir => FEATURE_M12N10_REC,
1315     id => FEATURE_XHTML10_REC,
1316     lang => FEATURE_XHTML10_REC,
1317     media => FEATURE_M12N10_REC,
1318     title => FEATURE_M12N10_REC,
1319     type => FEATURE_M12N10_REC,
1320 wakaba 1.1 }),
1321 wakaba 1.40 check_start => sub {
1322     my ($self, $item, $element_state) = @_;
1323    
1324 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1325 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1326 wakaba 1.27 if (not defined $type or
1327     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1328 wakaba 1.40 $element_state->{allow_element} = 0;
1329     $element_state->{style_type} = 'text/css';
1330     } else {
1331     $element_state->{allow_element} = 1; # unknown
1332     $element_state->{style_type} = $type; ## TODO: $type normalization
1333     }
1334     },
1335     check_child_element => sub {
1336     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1337     $child_is_transparent, $element_state) = @_;
1338     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1339     $self->{onerror}->(node => $child_el,
1340     type => 'element not allowed:minus',
1341     level => $self->{must_level});
1342     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1343     #
1344     } elsif ($element_state->{allow_element}) {
1345     #
1346     } else {
1347     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1348     }
1349     },
1350     check_child_text => sub {
1351     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1352     $element_state->{text} .= $child_node->text_content;
1353     },
1354     check_end => sub {
1355     my ($self, $item, $element_state) = @_;
1356     if ($element_state->{style_type} eq 'text/css') {
1357     $self->{onsubdoc}->({s => $element_state->{text},
1358     container_node => $item->{node},
1359 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1360 wakaba 1.27 } else {
1361 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1362     type => 'style:'.$element_state->{style_type});
1363 wakaba 1.27 }
1364 wakaba 1.40
1365     $HTMLChecker{check_end}->(@_);
1366 wakaba 1.1 },
1367     };
1368 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1369 wakaba 1.1
1370     $Element->{$HTML_NS}->{body} = {
1371 wakaba 1.40 %HTMLProseContentChecker,
1372 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1373     check_attrs => $GetHTMLAttrsChecker->({}, {
1374     %HTMLAttrStatus,
1375     %HTMLM12NCommonAttrStatus,
1376     alink => FEATURE_M12N10_REC_DEPRECATED,
1377     background => FEATURE_M12N10_REC_DEPRECATED,
1378     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
1379     lang => FEATURE_XHTML10_REC,
1380     link => FEATURE_M12N10_REC_DEPRECATED,
1381     onload => FEATURE_M12N10_REC,
1382     onunload => FEATURE_M12N10_REC,
1383     text => FEATURE_M12N10_REC_DEPRECATED,
1384     vlink => FEATURE_M12N10_REC_DEPRECATED,
1385     }),
1386 wakaba 1.1 };
1387    
1388     $Element->{$HTML_NS}->{section} = {
1389 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1390 wakaba 1.40 %HTMLProseContentChecker,
1391 wakaba 1.1 };
1392    
1393     $Element->{$HTML_NS}->{nav} = {
1394 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1395 wakaba 1.40 %HTMLProseContentChecker,
1396 wakaba 1.1 };
1397    
1398     $Element->{$HTML_NS}->{article} = {
1399 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1400 wakaba 1.40 %HTMLProseContentChecker,
1401 wakaba 1.1 };
1402    
1403     $Element->{$HTML_NS}->{blockquote} = {
1404 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1405 wakaba 1.40 %HTMLProseContentChecker,
1406     check_attrs => $GetHTMLAttrsChecker->({
1407 wakaba 1.1 cite => $HTMLURIAttrChecker,
1408 wakaba 1.49 }, {
1409     %HTMLAttrStatus,
1410     %HTMLM12NCommonAttrStatus,
1411     cite => FEATURE_M12N10_REC,
1412     lang => FEATURE_XHTML10_REC,
1413 wakaba 1.1 }),
1414     };
1415    
1416     $Element->{$HTML_NS}->{aside} = {
1417 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1418 wakaba 1.40 %HTMLProseContentChecker,
1419 wakaba 1.1 };
1420    
1421     $Element->{$HTML_NS}->{h1} = {
1422 wakaba 1.40 %HTMLPhrasingContentChecker,
1423 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1424     check_attrs => $GetHTMLAttrsChecker->({}, {
1425     %HTMLAttrStatus,
1426     %HTMLM12NCommonAttrStatus,
1427     align => FEATURE_M12N10_REC_DEPRECATED,
1428     lang => FEATURE_XHTML10_REC,
1429     }),
1430 wakaba 1.40 check_start => sub {
1431     my ($self, $item, $element_state) = @_;
1432     $self->{flag}->{has_hn} = 1;
1433 wakaba 1.1 },
1434     };
1435    
1436 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1437 wakaba 1.1
1438 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1439 wakaba 1.1
1440 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1441 wakaba 1.1
1442 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1443 wakaba 1.1
1444 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1445 wakaba 1.1
1446 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1447    
1448 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1449 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1450 wakaba 1.40 %HTMLProseContentChecker,
1451     check_start => sub {
1452     my ($self, $item, $element_state) = @_;
1453     $self->_add_minus_elements ($element_state,
1454     {$HTML_NS => {qw/header 1 footer 1/}},
1455     $HTMLSectioningContent);
1456     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1457     $self->{flag}->{has_hn} = 0;
1458     },
1459     check_end => sub {
1460     my ($self, $item, $element_state) = @_;
1461     $self->_remove_minus_elements ($element_state);
1462     unless ($self->{flag}->{has_hn}) {
1463     $self->{onerror}->(node => $item->{node},
1464     type => 'element missing:hn');
1465     }
1466     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1467 wakaba 1.1
1468 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1469 wakaba 1.1 },
1470 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1471 wakaba 1.1 };
1472    
1473     $Element->{$HTML_NS}->{footer} = {
1474 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1475 wakaba 1.40 %HTMLProseContentChecker,
1476     check_start => sub {
1477     my ($self, $item, $element_state) = @_;
1478     $self->_add_minus_elements ($element_state,
1479     {$HTML_NS => {footer => 1}},
1480     $HTMLSectioningContent, $HTMLHeadingContent);
1481     },
1482     check_end => sub {
1483     my ($self, $item, $element_state) = @_;
1484     $self->_remove_minus_elements ($element_state);
1485 wakaba 1.1
1486 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1487 wakaba 1.1 },
1488     };
1489    
1490     $Element->{$HTML_NS}->{address} = {
1491 wakaba 1.40 %HTMLProseContentChecker,
1492 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1493     check_attrs => $GetHTMLAttrsChecker->({}, {
1494     %HTMLAttrStatus,
1495     %HTMLM12NCommonAttrStatus,
1496     lang => FEATURE_XHTML10_REC,
1497     }),
1498 wakaba 1.40 check_start => sub {
1499     my ($self, $item, $element_state) = @_;
1500     $self->_add_minus_elements ($element_state,
1501     {$HTML_NS => {footer => 1, address => 1}},
1502     $HTMLSectioningContent, $HTMLHeadingContent);
1503     },
1504     check_end => sub {
1505     my ($self, $item, $element_state) = @_;
1506     $self->_remove_minus_elements ($element_state);
1507 wakaba 1.29
1508 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1509 wakaba 1.29 },
1510 wakaba 1.1 };
1511    
1512     $Element->{$HTML_NS}->{p} = {
1513 wakaba 1.40 %HTMLPhrasingContentChecker,
1514 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1515     check_attrs => $GetHTMLAttrsChecker->({}, {
1516     %HTMLAttrStatus,
1517     %HTMLM12NCommonAttrStatus,
1518     align => FEATURE_M12N10_REC_DEPRECATED,
1519     lang => FEATURE_XHTML10_REC,
1520     }),
1521 wakaba 1.1 };
1522    
1523     $Element->{$HTML_NS}->{hr} = {
1524 wakaba 1.40 %HTMLEmptyChecker,
1525 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1526     check_attrs => $GetHTMLAttrsChecker->({}, {
1527     %HTMLAttrStatus,
1528     %HTMLM12NCommonAttrStatus,
1529     align => FEATURE_M12N10_REC_DEPRECATED,
1530     lang => FEATURE_XHTML10_REC,
1531     noshade => FEATURE_M12N10_REC_DEPRECATED,
1532     size => FEATURE_M12N10_REC_DEPRECATED,
1533     width => FEATURE_M12N10_REC_DEPRECATED,
1534     }),
1535 wakaba 1.1 };
1536    
1537     $Element->{$HTML_NS}->{br} = {
1538 wakaba 1.40 %HTMLEmptyChecker,
1539 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1540     check_attrs => $GetHTMLAttrsChecker->({}, {
1541     %HTMLAttrStatus,
1542     class => FEATURE_M12N10_REC,
1543     clear => FEATURE_M12N10_REC_DEPRECATED,
1544     id => FEATURE_M12N10_REC,
1545     style => FEATURE_XHTML10_REC,
1546     title => FEATURE_M12N10_REC,
1547     }),
1548 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1549     ## (This requirement is semantic so that we cannot check.)
1550 wakaba 1.1 };
1551    
1552     $Element->{$HTML_NS}->{dialog} = {
1553 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1554 wakaba 1.40 %HTMLChecker,
1555     check_start => sub {
1556     my ($self, $item, $element_state) = @_;
1557     $element_state->{phase} = 'before dt';
1558     },
1559     check_child_element => sub {
1560     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1561     $child_is_transparent, $element_state) = @_;
1562     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1563     $self->{onerror}->(node => $child_el,
1564     type => 'element not allowed:minus',
1565     level => $self->{must_level});
1566     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1567     #
1568     } elsif ($element_state->{phase} eq 'before dt') {
1569     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1570     $element_state->{phase} = 'before dd';
1571     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1572     $self->{onerror}
1573     ->(node => $child_el, type => 'ps element missing:dt');
1574     $element_state->{phase} = 'before dt';
1575     } else {
1576     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1577     }
1578     } elsif ($element_state->{phase} eq 'before dd') {
1579     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1580     $element_state->{phase} = 'before dt';
1581     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1582     $self->{onerror}
1583     ->(node => $child_el, type => 'ps element missing:dd');
1584     $element_state->{phase} = 'before dd';
1585     } else {
1586     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1587 wakaba 1.1 }
1588 wakaba 1.40 } else {
1589     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1590     }
1591     },
1592     check_child_text => sub {
1593     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1594     if ($has_significant) {
1595     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1596 wakaba 1.1 }
1597 wakaba 1.40 },
1598     check_end => sub {
1599     my ($self, $item, $element_state) = @_;
1600     if ($element_state->{phase} eq 'before dd') {
1601     $self->{onerror}->(node => $item->{node},
1602     type => 'child element missing:dd');
1603 wakaba 1.1 }
1604 wakaba 1.40
1605     $HTMLChecker{check_end}->(@_);
1606 wakaba 1.1 },
1607     };
1608    
1609     $Element->{$HTML_NS}->{pre} = {
1610 wakaba 1.40 %HTMLPhrasingContentChecker,
1611 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1612     check_attrs => $GetHTMLAttrsChecker->({}, {
1613     %HTMLAttrStatus,
1614     %HTMLM12NCommonAttrStatus,
1615     lang => FEATURE_XHTML10_REC,
1616     width => FEATURE_M12N10_REC_DEPRECATED,
1617     }),
1618 wakaba 1.1 };
1619    
1620     $Element->{$HTML_NS}->{ol} = {
1621 wakaba 1.40 %HTMLChecker,
1622 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1623 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1624 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1625 wakaba 1.49 }, {
1626     %HTMLAttrStatus,
1627     %HTMLM12NCommonAttrStatus,
1628     compact => FEATURE_M12N10_REC_DEPRECATED,
1629     lang => FEATURE_XHTML10_REC,
1630     start => FEATURE_M12N10_REC_DEPRECATED,
1631     type => FEATURE_M12N10_REC_DEPRECATED,
1632 wakaba 1.1 }),
1633 wakaba 1.40 check_child_element => sub {
1634     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1635     $child_is_transparent, $element_state) = @_;
1636     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1637     $self->{onerror}->(node => $child_el,
1638     type => 'element not allowed:minus',
1639     level => $self->{must_level});
1640     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1641     #
1642     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1643     #
1644     } else {
1645     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1646 wakaba 1.1 }
1647 wakaba 1.40 },
1648     check_child_text => sub {
1649     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1650     if ($has_significant) {
1651     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1652 wakaba 1.1 }
1653     },
1654     };
1655    
1656     $Element->{$HTML_NS}->{ul} = {
1657 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1658 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1659     check_attrs => $GetHTMLAttrsChecker->({}, {
1660     %HTMLAttrStatus,
1661     %HTMLM12NCommonAttrStatus,
1662     compact => FEATURE_M12N10_REC_DEPRECATED,
1663     lang => FEATURE_XHTML10_REC,
1664     type => FEATURE_M12N10_REC_DEPRECATED,
1665     }),
1666 wakaba 1.1 };
1667    
1668     $Element->{$HTML_NS}->{li} = {
1669 wakaba 1.40 %HTMLProseContentChecker,
1670 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1671 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1672 wakaba 1.49 value => sub {
1673 wakaba 1.1 my ($self, $attr) = @_;
1674     my $parent = $attr->owner_element->manakai_parent_element;
1675     if (defined $parent) {
1676     my $parent_ns = $parent->namespace_uri;
1677     $parent_ns = '' unless defined $parent_ns;
1678     my $parent_ln = $parent->manakai_local_name;
1679     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1680     $self->{onerror}->(node => $attr, level => 'unsupported',
1681     type => 'attribute');
1682     }
1683     }
1684     $HTMLIntegerAttrChecker->($self, $attr);
1685 wakaba 1.49 }, ## TODO: test
1686     }, {
1687     %HTMLAttrStatus,
1688     %HTMLM12NCommonAttrStatus,
1689     lang => FEATURE_XHTML10_REC,
1690     type => FEATURE_M12N10_REC_DEPRECATED,
1691     value => FEATURE_M12N10_REC_DEPRECATED,
1692 wakaba 1.1 }),
1693 wakaba 1.40 check_child_element => sub {
1694     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1695     $child_is_transparent, $element_state) = @_;
1696     if ($self->{flag}->{in_menu}) {
1697     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1698     } else {
1699     $HTMLProseContentChecker{check_child_element}->(@_);
1700     }
1701     },
1702     check_child_text => sub {
1703     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1704     if ($self->{flag}->{in_menu}) {
1705     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1706 wakaba 1.1 } else {
1707 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1708 wakaba 1.1 }
1709     },
1710     };
1711    
1712     $Element->{$HTML_NS}->{dl} = {
1713 wakaba 1.40 %HTMLChecker,
1714 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1715     check_attrs => $GetHTMLAttrsChecker->({}, {
1716     %HTMLAttrStatus,
1717     %HTMLM12NCommonAttrStatus,
1718     compact => FEATURE_M12N10_REC_DEPRECATED,
1719     lang => FEATURE_XHTML10_REC,
1720     type => FEATURE_M12N10_REC_DEPRECATED,
1721     }),
1722 wakaba 1.40 check_start => sub {
1723     my ($self, $item, $element_state) = @_;
1724     $element_state->{phase} = 'before dt';
1725     },
1726     check_child_element => sub {
1727     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1728     $child_is_transparent, $element_state) = @_;
1729     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1730     $self->{onerror}->(node => $child_el,
1731     type => 'element not allowed:minus',
1732     level => $self->{must_level});
1733     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1734     #
1735     } elsif ($element_state->{phase} eq 'in dds') {
1736     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1737     #$element_state->{phase} = 'in dds';
1738     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1739     $element_state->{phase} = 'in dts';
1740     } else {
1741     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1742     }
1743     } elsif ($element_state->{phase} eq 'in dts') {
1744     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1745     #$element_state->{phase} = 'in dts';
1746     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1747     $element_state->{phase} = 'in dds';
1748     } else {
1749     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1750     }
1751     } elsif ($element_state->{phase} eq 'before dt') {
1752     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1753     $element_state->{phase} = 'in dts';
1754     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1755     $self->{onerror}
1756     ->(node => $child_el, type => 'ps element missing:dt');
1757     $element_state->{phase} = 'in dds';
1758     } else {
1759     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1760 wakaba 1.1 }
1761 wakaba 1.40 } else {
1762     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1763 wakaba 1.1 }
1764 wakaba 1.40 },
1765     check_child_text => sub {
1766     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1767     if ($has_significant) {
1768     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1769     }
1770     },
1771     check_end => sub {
1772     my ($self, $item, $element_state) = @_;
1773     if ($element_state->{phase} eq 'in dts') {
1774     $self->{onerror}->(node => $item->{node},
1775     type => 'child element missing:dd');
1776 wakaba 1.1 }
1777    
1778 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1779 wakaba 1.1 },
1780     };
1781    
1782     $Element->{$HTML_NS}->{dt} = {
1783 wakaba 1.40 %HTMLPhrasingContentChecker,
1784 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1785     check_attrs => $GetHTMLAttrsChecker->({}, {
1786     %HTMLAttrStatus,
1787     %HTMLM12NCommonAttrStatus,
1788     lang => FEATURE_XHTML10_REC,
1789     }),
1790 wakaba 1.1 };
1791    
1792     $Element->{$HTML_NS}->{dd} = {
1793 wakaba 1.40 %HTMLProseContentChecker,
1794 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1795     check_attrs => $GetHTMLAttrsChecker->({}, {
1796     %HTMLAttrStatus,
1797     %HTMLM12NCommonAttrStatus,
1798     lang => FEATURE_XHTML10_REC,
1799     }),
1800 wakaba 1.1 };
1801    
1802     $Element->{$HTML_NS}->{a} = {
1803 wakaba 1.40 %HTMLPhrasingContentChecker,
1804 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1805 wakaba 1.40 check_attrs => sub {
1806     my ($self, $item, $element_state) = @_;
1807 wakaba 1.1 my %attr;
1808 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1809 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1810     $attr_ns = '' unless defined $attr_ns;
1811     my $attr_ln = $attr->manakai_local_name;
1812     my $checker;
1813     if ($attr_ns eq '') {
1814     $checker = {
1815     target => $HTMLTargetAttrChecker,
1816     href => $HTMLURIAttrChecker,
1817     ping => $HTMLSpaceURIsAttrChecker,
1818 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1819 wakaba 1.1 media => $HTMLMQAttrChecker,
1820     hreflang => $HTMLLanguageTagAttrChecker,
1821     type => $HTMLIMTAttrChecker,
1822     }->{$attr_ln};
1823     if ($checker) {
1824     $attr{$attr_ln} = $attr;
1825     } else {
1826     $checker = $HTMLAttrChecker->{$attr_ln};
1827     }
1828     }
1829     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1830     || $AttrChecker->{$attr_ns}->{''};
1831     if ($checker) {
1832     $checker->($self, $attr) if ref $checker;
1833 wakaba 1.49 } elsif ($attr_ns eq '') {
1834     $self->{onerror}->(node => $attr, level => $self->{must_level},
1835     type => 'attribute not defined');
1836 wakaba 1.1 } else {
1837     $self->{onerror}->(node => $attr, level => 'unsupported',
1838     type => 'attribute');
1839     ## ISSUE: No comformance createria for unknown attributes in the spec
1840     }
1841 wakaba 1.49
1842     if ($attr_ns eq '') {
1843     $self->_attr_status_info ($attr, {
1844     %HTMLAttrStatus,
1845     %HTMLM12NCommonAttrStatus,
1846     accesskey => FEATURE_M12N10_REC,
1847     charset => FEATURE_M12N10_REC,
1848     coords => FEATURE_M12N10_REC,
1849     href => FEATURE_M12N10_REC,
1850     hreflang => FEATURE_M12N10_REC,
1851     lang => FEATURE_XHTML10_REC,
1852     name => FEATURE_M12N10_REC_DEPRECATED,
1853     onblur => FEATURE_M12N10_REC,
1854     onfocus => FEATURE_M12N10_REC,
1855     rel => FEATURE_M12N10_REC,
1856     rev => FEATURE_M12N10_REC,
1857     shape => FEATURE_M12N10_REC,
1858     tabindex => FEATURE_M12N10_REC,
1859     target => FEATURE_M12N10_REC,
1860     type => FEATURE_M12N10_REC,
1861     }->{$attr_ln});
1862     }
1863 wakaba 1.1 }
1864    
1865 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1866 wakaba 1.4 if (defined $attr{href}) {
1867     $self->{has_hyperlink_element} = 1;
1868 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1869 wakaba 1.4 } else {
1870 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1871     if (defined $attr{$_}) {
1872     $self->{onerror}->(node => $attr{$_},
1873     type => 'attribute not allowed');
1874     }
1875     }
1876     }
1877     },
1878 wakaba 1.40 check_start => sub {
1879     my ($self, $item, $element_state) = @_;
1880     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1881     },
1882     check_end => sub {
1883     my ($self, $item, $element_state) = @_;
1884     $self->_remove_minus_elements ($element_state);
1885 wakaba 1.1
1886 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1887 wakaba 1.1 },
1888     };
1889    
1890     $Element->{$HTML_NS}->{q} = {
1891 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1892 wakaba 1.40 %HTMLPhrasingContentChecker,
1893     check_attrs => $GetHTMLAttrsChecker->({
1894 wakaba 1.49 %HTMLAttrStatus,
1895     %HTMLM12NCommonAttrStatus,
1896 wakaba 1.1 cite => $HTMLURIAttrChecker,
1897 wakaba 1.49 lang => FEATURE_XHTML10_REC,
1898 wakaba 1.1 }),
1899     };
1900    
1901     $Element->{$HTML_NS}->{cite} = {
1902 wakaba 1.40 %HTMLPhrasingContentChecker,
1903 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1904     check_attrs => $GetHTMLAttrsChecker->({}, {
1905     %HTMLAttrStatus,
1906     %HTMLM12NCommonAttrStatus,
1907     lang => FEATURE_XHTML10_REC,
1908     }),
1909 wakaba 1.1 };
1910    
1911     $Element->{$HTML_NS}->{em} = {
1912 wakaba 1.40 %HTMLPhrasingContentChecker,
1913 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1914     check_attrs => $GetHTMLAttrsChecker->({}, {
1915     %HTMLAttrStatus,
1916     %HTMLM12NCommonAttrStatus,
1917     lang => FEATURE_XHTML10_REC,
1918     }),
1919 wakaba 1.1 };
1920    
1921     $Element->{$HTML_NS}->{strong} = {
1922 wakaba 1.40 %HTMLPhrasingContentChecker,
1923 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1924     check_attrs => $GetHTMLAttrsChecker->({}, {
1925     %HTMLAttrStatus,
1926     %HTMLM12NCommonAttrStatus,
1927     lang => FEATURE_XHTML10_REC,
1928     }),
1929 wakaba 1.1 };
1930    
1931     $Element->{$HTML_NS}->{small} = {
1932 wakaba 1.40 %HTMLPhrasingContentChecker,
1933 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1934     check_attrs => $GetHTMLAttrsChecker->({}, {
1935     %HTMLAttrStatus,
1936     %HTMLM12NCommonAttrStatus,
1937     lang => FEATURE_XHTML10_REC,
1938     }),
1939 wakaba 1.1 };
1940    
1941 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
1942 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1943 wakaba 1.40 %HTMLPhrasingContentChecker,
1944 wakaba 1.1 };
1945    
1946     $Element->{$HTML_NS}->{dfn} = {
1947 wakaba 1.40 %HTMLPhrasingContentChecker,
1948 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
1949     check_attrs => $GetHTMLAttrsChecker->({}, {
1950     %HTMLAttrStatus,
1951     %HTMLM12NCommonAttrStatus,
1952     lang => FEATURE_XHTML10_REC,
1953     }),
1954 wakaba 1.40 check_start => sub {
1955     my ($self, $item, $element_state) = @_;
1956     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
1957 wakaba 1.1
1958 wakaba 1.40 my $node = $item->{node};
1959 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
1960     unless (defined $term) {
1961     for my $child (@{$node->child_nodes}) {
1962     if ($child->node_type == 1) { # ELEMENT_NODE
1963     if (defined $term) {
1964     undef $term;
1965     last;
1966     } elsif ($child->manakai_local_name eq 'abbr') {
1967     my $nsuri = $child->namespace_uri;
1968     if (defined $nsuri and $nsuri eq $HTML_NS) {
1969     my $attr = $child->get_attribute_node_ns (undef, 'title');
1970     if ($attr) {
1971     $term = $attr->value;
1972     }
1973     }
1974     }
1975     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1976     ## TEXT_NODE or CDATA_SECTION_NODE
1977     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1978     next;
1979     }
1980     undef $term;
1981     last;
1982     }
1983     }
1984     unless (defined $term) {
1985     $term = $node->text_content;
1986     }
1987     }
1988     if ($self->{term}->{$term}) {
1989     $self->{onerror}->(node => $node, type => 'duplicate term');
1990     push @{$self->{term}->{$term}}, $node;
1991     } else {
1992     $self->{term}->{$term} = [$node];
1993     }
1994     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1995     ## has |title|.
1996 wakaba 1.40 },
1997     check_end => sub {
1998     my ($self, $item, $element_state) = @_;
1999     $self->_remove_minus_elements ($element_state);
2000 wakaba 1.1
2001 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2002 wakaba 1.1 },
2003     };
2004    
2005     $Element->{$HTML_NS}->{abbr} = {
2006 wakaba 1.40 %HTMLPhrasingContentChecker,
2007 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2008     check_attrs => $GetHTMLAttrsChecker->({}, {
2009     %HTMLAttrStatus,
2010     %HTMLM12NCommonAttrStatus,
2011     lang => FEATURE_XHTML10_REC,
2012     }),
2013     };
2014    
2015     $Element->{$HTML_NS}->{acronym} = {
2016     %HTMLPhrasingContentChecker,
2017     status => FEATURE_M12N10_REC,
2018     check_attrs => $GetHTMLAttrsChecker->({}, {
2019     %HTMLAttrStatus,
2020     %HTMLM12NCommonAttrStatus,
2021     lang => FEATURE_XHTML10_REC,
2022     }),
2023 wakaba 1.1 };
2024    
2025     $Element->{$HTML_NS}->{time} = {
2026 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2027 wakaba 1.40 %HTMLPhrasingContentChecker,
2028     check_attrs => $GetHTMLAttrsChecker->({
2029 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
2030 wakaba 1.49 }, {
2031     %HTMLAttrStatus,
2032     %HTMLM12NCommonAttrStatus,
2033 wakaba 1.1 }),
2034     ## TODO: Write tests
2035 wakaba 1.40 check_end => sub {
2036     my ($self, $item, $element_state) = @_;
2037 wakaba 1.1
2038 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
2039 wakaba 1.1 my $input;
2040     my $reg_sp;
2041     my $input_node;
2042     if ($attr) {
2043     $input = $attr->value;
2044     $reg_sp = qr/[\x09-\x0D\x20]*/;
2045     $input_node = $attr;
2046     } else {
2047 wakaba 1.40 $input = $item->{node}->text_content;
2048 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
2049 wakaba 1.40 $input_node = $item->{node};
2050 wakaba 1.1
2051     ## ISSUE: What is the definition for "successfully extracts a date
2052     ## or time"? If the algorithm says the string is invalid but
2053     ## return some date or time, is it "successfully"?
2054     }
2055    
2056     my $hour;
2057     my $minute;
2058     my $second;
2059     if ($input =~ /
2060     \A
2061     [\x09-\x0D\x20]*
2062     ([0-9]+) # 1
2063     (?>
2064     -([0-9]+) # 2
2065     -([0-9]+) # 3
2066     [\x09-\x0D\x20]*
2067     (?>
2068     T
2069     [\x09-\x0D\x20]*
2070     )?
2071     ([0-9]+) # 4
2072     :([0-9]+) # 5
2073     (?>
2074     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
2075     )?
2076     [\x09-\x0D\x20]*
2077     (?>
2078     Z
2079     [\x09-\x0D\x20]*
2080     |
2081     [+-]([0-9]+):([0-9]+) # 7, 8
2082     [\x09-\x0D\x20]*
2083     )?
2084     \z
2085     |
2086     :([0-9]+) # 9
2087     (?>
2088     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
2089     )?
2090     [\x09-\x0D\x20]*\z
2091     )
2092     /x) {
2093     if (defined $2) { ## YYYY-MM-DD T? hh:mm
2094     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
2095     length $4 != 2 or length $5 != 2) {
2096     $self->{onerror}->(node => $input_node,
2097     type => 'dateortime:syntax error');
2098     }
2099    
2100     if (1 <= $2 and $2 <= 12) {
2101     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2102     if $3 < 1 or
2103     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
2104     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
2105     if $2 == 2 and $3 == 29 and
2106     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
2107     } else {
2108     $self->{onerror}->(node => $input_node,
2109     type => 'datetime:bad month');
2110     }
2111    
2112     ($hour, $minute, $second) = ($4, $5, $6);
2113    
2114     if (defined $7) { ## [+-]hh:mm
2115     if (length $7 != 2 or length $8 != 2) {
2116     $self->{onerror}->(node => $input_node,
2117     type => 'dateortime:syntax error');
2118     }
2119    
2120     $self->{onerror}->(node => $input_node,
2121     type => 'datetime:bad timezone hour')
2122     if $7 > 23;
2123     $self->{onerror}->(node => $input_node,
2124     type => 'datetime:bad timezone minute')
2125     if $8 > 59;
2126     }
2127     } else { ## hh:mm
2128     if (length $1 != 2 or length $9 != 2) {
2129     $self->{onerror}->(node => $input_node,
2130     type => qq'dateortime:syntax error');
2131     }
2132    
2133     ($hour, $minute, $second) = ($1, $9, $10);
2134     }
2135    
2136     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
2137     if $hour > 23;
2138     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
2139     if $minute > 59;
2140    
2141     if (defined $second) { ## s
2142     ## NOTE: Integer part of second don't have to have length of two.
2143    
2144     if (substr ($second, 0, 1) eq '.') {
2145     $self->{onerror}->(node => $input_node,
2146     type => 'dateortime:syntax error');
2147     }
2148    
2149     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
2150     if $second >= 60;
2151     }
2152     } else {
2153     $self->{onerror}->(node => $input_node,
2154     type => 'dateortime:syntax error');
2155     }
2156    
2157 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
2158 wakaba 1.1 },
2159     };
2160    
2161     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
2162 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2163 wakaba 1.40 %HTMLPhrasingContentChecker,
2164     check_attrs => $GetHTMLAttrsChecker->({
2165 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2166     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2167     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2168     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2169     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2170     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
2171     }),
2172     };
2173    
2174     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
2175 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2176 wakaba 1.40 %HTMLPhrasingContentChecker,
2177     check_attrs => $GetHTMLAttrsChecker->({
2178 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
2179     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
2180     }),
2181     };
2182    
2183     $Element->{$HTML_NS}->{code} = {
2184 wakaba 1.40 %HTMLPhrasingContentChecker,
2185 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2186     check_attrs => $GetHTMLAttrsChecker->({}, {
2187     %HTMLAttrStatus,
2188     %HTMLM12NCommonAttrStatus,
2189     lang => FEATURE_XHTML10_REC,
2190     }),
2191 wakaba 1.1 };
2192    
2193     $Element->{$HTML_NS}->{var} = {
2194 wakaba 1.40 %HTMLPhrasingContentChecker,
2195 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2196     check_attrs => $GetHTMLAttrsChecker->({}, {
2197     %HTMLAttrStatus,
2198     %HTMLM12NCommonAttrStatus,
2199     lang => FEATURE_XHTML10_REC,
2200     }),
2201 wakaba 1.1 };
2202    
2203     $Element->{$HTML_NS}->{samp} = {
2204 wakaba 1.40 %HTMLPhrasingContentChecker,
2205 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2206     check_attrs => $GetHTMLAttrsChecker->({}, {
2207     %HTMLAttrStatus,
2208     %HTMLM12NCommonAttrStatus,
2209     lang => FEATURE_XHTML10_REC,
2210     }),
2211 wakaba 1.1 };
2212    
2213     $Element->{$HTML_NS}->{kbd} = {
2214 wakaba 1.40 %HTMLPhrasingContentChecker,
2215 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2216     check_attrs => $GetHTMLAttrsChecker->({}, {
2217     %HTMLAttrStatus,
2218     %HTMLM12NCommonAttrStatus,
2219     lang => FEATURE_XHTML10_REC,
2220     }),
2221 wakaba 1.1 };
2222    
2223     $Element->{$HTML_NS}->{sub} = {
2224 wakaba 1.40 %HTMLPhrasingContentChecker,
2225 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2226     check_attrs => $GetHTMLAttrsChecker->({}, {
2227     %HTMLAttrStatus,
2228     %HTMLM12NCommonAttrStatus,
2229     lang => FEATURE_XHTML10_REC,
2230     }),
2231 wakaba 1.1 };
2232    
2233     $Element->{$HTML_NS}->{sup} = {
2234 wakaba 1.40 %HTMLPhrasingContentChecker,
2235 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2236     check_attrs => $GetHTMLAttrsChecker->({}, {
2237     %HTMLAttrStatus,
2238     %HTMLM12NCommonAttrStatus,
2239     lang => FEATURE_XHTML10_REC,
2240     }),
2241 wakaba 1.1 };
2242    
2243     $Element->{$HTML_NS}->{span} = {
2244 wakaba 1.40 %HTMLPhrasingContentChecker,
2245 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2246     check_attrs => $GetHTMLAttrsChecker->({}, {
2247     %HTMLAttrStatus,
2248     %HTMLM12NCommonAttrStatus,
2249     datafld => FEATURE_HTML4_REC_RESERVED,
2250     dataformatas => FEATURE_HTML4_REC_RESERVED,
2251     datasrc => FEATURE_HTML4_REC_RESERVED,
2252     lang => FEATURE_XHTML10_REC,
2253     }),
2254 wakaba 1.1 };
2255    
2256     $Element->{$HTML_NS}->{i} = {
2257 wakaba 1.40 %HTMLPhrasingContentChecker,
2258 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2259     check_attrs => $GetHTMLAttrsChecker->({}, {
2260     %HTMLAttrStatus,
2261     %HTMLM12NCommonAttrStatus,
2262     lang => FEATURE_XHTML10_REC,
2263     }),
2264 wakaba 1.1 };
2265    
2266     $Element->{$HTML_NS}->{b} = {
2267 wakaba 1.40 %HTMLPhrasingContentChecker,
2268 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2269     check_attrs => $GetHTMLAttrsChecker->({}, {
2270     %HTMLAttrStatus,
2271     %HTMLM12NCommonAttrStatus,
2272     lang => FEATURE_XHTML10_REC,
2273     }),
2274 wakaba 1.1 };
2275    
2276     $Element->{$HTML_NS}->{bdo} = {
2277 wakaba 1.40 %HTMLPhrasingContentChecker,
2278 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2279 wakaba 1.40 check_attrs => sub {
2280     my ($self, $item, $element_state) = @_;
2281 wakaba 1.49 $GetHTMLAttrsChecker->({}, {
2282     %HTMLAttrStatus,
2283     class => FEATURE_M12N10_REC,
2284     dir => FEATURE_M12N10_REC,
2285     id => FEATURE_M12N10_REC,
2286     style => FEATURE_XHTML10_REC,
2287     title => FEATURE_M12N10_REC,
2288     lang => FEATURE_XHTML10_REC,
2289     })->($self, $item, $element_state);
2290 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
2291     $self->{onerror}->(node => $item->{node},
2292     type => 'attribute missing:dir');
2293 wakaba 1.1 }
2294     },
2295     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
2296     };
2297    
2298 wakaba 1.49 ## TODO: big, tt: Common lang(xhtml10)
2299    
2300 wakaba 1.29 =pod
2301    
2302     ## TODO:
2303    
2304     +
2305     + <p>Partly because of the confusion described above, authors are
2306     + strongly recommended to always mark up all paragraphs with the
2307     + <code>p</code> element, and to not have any <code>ins</code> or
2308     + <code>del</code> elements that cross across any <span
2309     + title="paragraph">implied paragraphs</span>.</p>
2310     +
2311     (An informative note)
2312    
2313     <p><code>ins</code> elements should not cross <span
2314     + title="paragraph">implied paragraph</span> boundaries.</p>
2315     (normative)
2316    
2317     + <p><code>del</code> elements should not cross <span
2318     + title="paragraph">implied paragraph</span> boundaries.</p>
2319     (normative)
2320    
2321     =cut
2322    
2323 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
2324 wakaba 1.40 %HTMLTransparentChecker,
2325 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2326 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2327 wakaba 1.1 cite => $HTMLURIAttrChecker,
2328     datetime => $HTMLDatetimeAttrChecker,
2329 wakaba 1.49 }, {
2330     %HTMLAttrStatus,
2331     %HTMLM12NCommonAttrStatus,
2332     lang => FEATURE_XHTML10_REC,
2333 wakaba 1.1 }),
2334     };
2335    
2336     $Element->{$HTML_NS}->{del} = {
2337 wakaba 1.40 %HTMLTransparentChecker,
2338 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2339 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2340 wakaba 1.1 cite => $HTMLURIAttrChecker,
2341     datetime => $HTMLDatetimeAttrChecker,
2342 wakaba 1.49 }, {
2343     %HTMLAttrStatus,
2344     %HTMLM12NCommonAttrStatus,
2345     cite => FEATURE_M12N10_REC,
2346     datetime => FEATURE_M12N10_REC,
2347     lang => FEATURE_XHTML10_REC,
2348 wakaba 1.1 }),
2349 wakaba 1.40 check_end => sub {
2350     my ($self, $item, $element_state) = @_;
2351     if ($element_state->{has_significant}) {
2352     ## NOTE: Significantness flag does not propagate.
2353     } elsif ($item->{transparent}) {
2354     #
2355     } else {
2356     $self->{onerror}->(node => $item->{node},
2357     level => $self->{should_level},
2358     type => 'no significant content');
2359     }
2360 wakaba 1.1 },
2361     };
2362    
2363 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2364 wakaba 1.40 %HTMLProseContentChecker,
2365 wakaba 1.48 status => FEATURE_HTML5_FD,
2366 wakaba 1.41 ## NOTE: legend, Prose | Prose, legend
2367     check_child_element => sub {
2368     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2369     $child_is_transparent, $element_state) = @_;
2370     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2371     $self->{onerror}->(node => $child_el,
2372     type => 'element not allowed:minus',
2373     level => $self->{must_level});
2374     $element_state->{has_non_legend} = 1;
2375     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2376     #
2377     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2378     if ($element_state->{has_legend_at_first}) {
2379     $self->{onerror}->(node => $child_el,
2380     type => 'element not allowed:figure legend',
2381     level => $self->{must_level});
2382     } elsif ($element_state->{has_legend}) {
2383     $self->{onerror}->(node => $element_state->{has_legend},
2384     type => 'element not allowed:figure legend',
2385     level => $self->{must_level});
2386     $element_state->{has_legend} = $child_el;
2387     } elsif ($element_state->{has_non_legend}) {
2388     $element_state->{has_legend} = $child_el;
2389     } else {
2390     $element_state->{has_legend_at_first} = 1;
2391 wakaba 1.35 }
2392 wakaba 1.41 delete $element_state->{has_non_legend};
2393     } else {
2394     $HTMLProseContentChecker{check_child_element}->(@_);
2395 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2396 wakaba 1.41 }
2397     },
2398     check_child_text => sub {
2399     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2400     if ($has_significant) {
2401     $element_state->{has_non_legend} = 1;
2402 wakaba 1.35 }
2403 wakaba 1.41 },
2404     check_end => sub {
2405     my ($self, $item, $element_state) = @_;
2406 wakaba 1.35
2407 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2408     #
2409     } elsif ($element_state->{has_legend}) {
2410     if ($element_state->{has_non_legend}) {
2411     $self->{onerror}->(node => $element_state->{has_legend},
2412 wakaba 1.35 type => 'element not allowed:figure legend',
2413     level => $self->{must_level});
2414     }
2415     } else {
2416 wakaba 1.41 $self->{onerror}->(node => $item->{node},
2417 wakaba 1.35 type => 'element missing:legend',
2418     level => $self->{must_level});
2419     }
2420 wakaba 1.41
2421     $HTMLProseContentChecker{check_end}->(@_);
2422     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2423 wakaba 1.35 },
2424     };
2425 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2426 wakaba 1.1
2427     $Element->{$HTML_NS}->{img} = {
2428 wakaba 1.40 %HTMLEmptyChecker,
2429 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2430 wakaba 1.40 check_attrs => sub {
2431     my ($self, $item, $element_state) = @_;
2432 wakaba 1.1 $GetHTMLAttrsChecker->({
2433     alt => sub { }, ## NOTE: No syntactical requirement
2434     src => $HTMLURIAttrChecker,
2435     usemap => $HTMLUsemapAttrChecker,
2436     ismap => sub {
2437 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2438     if (not $self->{flag}->{in_a_href}) {
2439 wakaba 1.15 $self->{onerror}->(node => $attr,
2440     type => 'attribute not allowed:ismap');
2441 wakaba 1.1 }
2442 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2443 wakaba 1.1 },
2444     ## TODO: height
2445     ## TODO: width
2446 wakaba 1.49 }, {
2447     %HTMLAttrStatus,
2448     %HTMLM12NCommonAttrStatus,
2449     align => FEATURE_M12N10_REC_DEPRECATED,
2450     alt => FEATURE_M12N10_REC,
2451     border => FEATURE_M12N10_REC_DEPRECATED,
2452     height => FEATURE_M12N10_REC,
2453     hspace => FEATURE_M12N10_REC_DEPRECATED,
2454     ismap => FEATURE_M12N10_REC,
2455     lang => FEATURE_XHTML10_REC,
2456     longdesc => FEATURE_M12N10_REC,
2457     name => FEATURE_M12N10_REC_DEPRECATED,
2458     src => FEATURE_M12N10_REC,
2459     usemap => FEATURE_M12N10_REC,
2460     vspace => FEATURE_M12N10_REC_DEPRECATED,
2461     width => FEATURE_M12N10_REC,
2462 wakaba 1.40 })->($self, $item);
2463     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2464     $self->{onerror}->(node => $item->{node},
2465 wakaba 1.37 type => 'attribute missing:alt',
2466     level => $self->{should_level});
2467 wakaba 1.1 }
2468 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2469     $self->{onerror}->(node => $item->{node},
2470     type => 'attribute missing:src');
2471 wakaba 1.1 }
2472     },
2473     };
2474    
2475     $Element->{$HTML_NS}->{iframe} = {
2476 wakaba 1.40 %HTMLTextChecker,
2477 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2478     ## NOTE: Not part of M12N10 Strict
2479 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2480 wakaba 1.1 src => $HTMLURIAttrChecker,
2481 wakaba 1.49 }, {
2482     %HTMLAttrStatus,
2483     %HTMLM12NCommonAttrStatus,
2484     align => FEATURE_XHTML10_REC,
2485     class => FEATURE_M12N10_REC,
2486     frameborder => FEATURE_M12N10_REC,
2487     height => FEATURE_M12N10_REC,
2488     id => FEATURE_M12N10_REC,
2489     longdesc => FEATURE_M12N10_REC,
2490     marginheight => FEATURE_M12N10_REC,
2491     marginwidth => FEATURE_M12N10_REC,
2492     name => FEATURE_M12N10_REC_DEPRECATED,
2493     scrolling => FEATURE_M12N10_REC,
2494     src => FEATURE_M12N10_REC,
2495     title => FEATURE_M12N10_REC,
2496     width => FEATURE_M12N10_REC,
2497 wakaba 1.1 }),
2498 wakaba 1.40 };
2499    
2500 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2501 wakaba 1.40 %HTMLEmptyChecker,
2502 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2503 wakaba 1.40 check_attrs => sub {
2504     my ($self, $item, $element_state) = @_;
2505 wakaba 1.1 my $has_src;
2506 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2507 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2508     $attr_ns = '' unless defined $attr_ns;
2509     my $attr_ln = $attr->manakai_local_name;
2510     my $checker;
2511     if ($attr_ns eq '') {
2512     if ($attr_ln eq 'src') {
2513     $checker = $HTMLURIAttrChecker;
2514     $has_src = 1;
2515     } elsif ($attr_ln eq 'type') {
2516     $checker = $HTMLIMTAttrChecker;
2517     } else {
2518     ## TODO: height
2519     ## TODO: width
2520     $checker = $HTMLAttrChecker->{$attr_ln}
2521     || sub { }; ## NOTE: Any local attribute is ok.
2522     }
2523     }
2524     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2525     || $AttrChecker->{$attr_ns}->{''};
2526     if ($checker) {
2527     $checker->($self, $attr);
2528     } else {
2529     $self->{onerror}->(node => $attr, level => 'unsupported',
2530     type => 'attribute');
2531     ## ISSUE: No comformance createria for global attributes in the spec
2532     }
2533     }
2534    
2535     unless ($has_src) {
2536 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2537 wakaba 1.1 type => 'attribute missing:src');
2538     }
2539     },
2540     };
2541    
2542 wakaba 1.49 ## TODO:
2543     ## {applet} FEATURE_M12N10_REC_DEPRECATED
2544     ## class, id, title, alt, archive, code, codebase, height, object, width name style,hspace,vspace(xhtml10)
2545    
2546 wakaba 1.1 $Element->{$HTML_NS}->{object} = {
2547 wakaba 1.40 %HTMLTransparentChecker,
2548 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2549 wakaba 1.40 check_attrs => sub {
2550     my ($self, $item, $element_state) = @_;
2551 wakaba 1.1 $GetHTMLAttrsChecker->({
2552     data => $HTMLURIAttrChecker,
2553     type => $HTMLIMTAttrChecker,
2554     usemap => $HTMLUsemapAttrChecker,
2555     ## TODO: width
2556     ## TODO: height
2557 wakaba 1.49 }, {
2558     %HTMLAttrStatus,
2559     %HTMLM12NCommonAttrStatus,
2560     align => FEATURE_XHTML10_REC,
2561     archive => FEATURE_M12N10_REC,
2562     border => FEATURE_XHTML10_REC,
2563     classid => FEATURE_M12N10_REC,
2564     codebase => FEATURE_M12N10_REC,
2565     codetype => FEATURE_M12N10_REC,
2566     data => FEATURE_M12N10_REC,
2567     datafld => FEATURE_HTML4_REC_RESERVED,
2568     dataformatas => FEATURE_HTML4_REC_RESERVED,
2569     datasrc => FEATURE_HTML4_REC_RESERVED,
2570     declare => FEATURE_M12N10_REC,
2571     height => FEATURE_M12N10_REC,
2572     hspace => FEATURE_XHTML10_REC,
2573     lang => FEATURE_XHTML10_REC,
2574     name => FEATURE_M12N10_REC,
2575     standby => FEATURE_M12N10_REC,
2576     tabindex => FEATURE_M12N10_REC,
2577     type => FEATURE_M12N10_REC,
2578     usemap => FEATURE_M12N10_REC,
2579     vspace => FEATURE_XHTML10_REC,
2580     width => FEATURE_M12N10_REC,
2581 wakaba 1.40 })->($self, $item);
2582     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2583     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2584     $self->{onerror}->(node => $item->{node},
2585 wakaba 1.1 type => 'attribute missing:data|type');
2586     }
2587     }
2588     },
2589 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2590     check_child_element => sub {
2591     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2592     $child_is_transparent, $element_state) = @_;
2593     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2594     $self->{onerror}->(node => $child_el,
2595     type => 'element not allowed:minus',
2596     level => $self->{must_level});
2597     $element_state->{has_non_legend} = 1;
2598     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2599     #
2600     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2601     if ($element_state->{has_non_param}) {
2602     $self->{onerror}->(node => $child_el,
2603     type => 'element not allowed:prose',
2604     level => $self->{must_level});
2605 wakaba 1.39 }
2606 wakaba 1.41 } else {
2607     $HTMLProseContentChecker{check_child_element}->(@_);
2608     $element_state->{has_non_param} = 1;
2609 wakaba 1.39 }
2610 wakaba 1.25 },
2611 wakaba 1.41 check_child_text => sub {
2612     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2613     if ($has_significant) {
2614     $element_state->{has_non_param} = 1;
2615     }
2616 wakaba 1.42 },
2617     check_end => sub {
2618     my ($self, $item, $element_state) = @_;
2619     if ($element_state->{has_significant}) {
2620 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2621 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2622     ## NOTE: Transparent.
2623     } else {
2624     $self->{onerror}->(node => $item->{node},
2625     level => $self->{should_level},
2626     type => 'no significant content');
2627     }
2628     },
2629 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2630 wakaba 1.1 };
2631 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2632     ## What about |<section><object data><style scoped></style>x</object></section>|?
2633     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2634 wakaba 1.1
2635     $Element->{$HTML_NS}->{param} = {
2636 wakaba 1.40 %HTMLEmptyChecker,
2637 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2638 wakaba 1.40 check_attrs => sub {
2639     my ($self, $item, $element_state) = @_;
2640 wakaba 1.1 $GetHTMLAttrsChecker->({
2641     name => sub { },
2642     value => sub { },
2643 wakaba 1.49 }, {
2644     %HTMLAttrStatus,
2645     id => FEATURE_M12N10_REC,
2646     name => FEATURE_M12N10_REC,
2647     type => FEATURE_M12N10_REC,
2648     value => FEATURE_M12N10_REC,
2649     valuetype => FEATURE_M12N10_REC,
2650 wakaba 1.40 })->($self, $item);
2651     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2652     $self->{onerror}->(node => $item->{node},
2653 wakaba 1.1 type => 'attribute missing:name');
2654     }
2655 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2656     $self->{onerror}->(node => $item->{node},
2657 wakaba 1.1 type => 'attribute missing:value');
2658     }
2659     },
2660     };
2661    
2662     $Element->{$HTML_NS}->{video} = {
2663 wakaba 1.40 %HTMLTransparentChecker,
2664 wakaba 1.48 status => FEATURE_HTML5_LC,
2665 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2666 wakaba 1.1 src => $HTMLURIAttrChecker,
2667     ## TODO: start, loopstart, loopend, end
2668     ## ISSUE: they MUST be "value time offset"s. Value?
2669 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2670 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2671     controls => $GetHTMLBooleanAttrChecker->('controls'),
2672 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2673 wakaba 1.42 ## TODO: width, height
2674 wakaba 1.1 }),
2675 wakaba 1.42 check_start => sub {
2676     my ($self, $item, $element_state) = @_;
2677     $element_state->{allow_source}
2678     = not $item->{node}->has_attribute_ns (undef, 'src');
2679     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2680     ## NOTE: It might be set true by |check_element|.
2681     },
2682     check_child_element => sub {
2683     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2684     $child_is_transparent, $element_state) = @_;
2685     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2686     $self->{onerror}->(node => $child_el,
2687     type => 'element not allowed:minus',
2688     level => $self->{must_level});
2689     delete $element_state->{allow_source};
2690     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2691     #
2692     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2693 wakaba 1.45 unless ($element_state->{allow_source}) {
2694 wakaba 1.42 $self->{onerror}->(node => $child_el,
2695     type => 'element not allowed:prose',
2696     level => $self->{must_level});
2697     }
2698 wakaba 1.45 $element_state->{has_source} = 1;
2699 wakaba 1.1 } else {
2700 wakaba 1.42 delete $element_state->{allow_source};
2701     $HTMLProseContentChecker{check_child_element}->(@_);
2702     }
2703     },
2704     check_child_text => sub {
2705     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2706     if ($has_significant) {
2707     delete $element_state->{allow_source};
2708     }
2709     $HTMLProseContentChecker{check_child_text}->(@_);
2710     },
2711     check_end => sub {
2712     my ($self, $item, $element_state) = @_;
2713     if ($element_state->{has_source} == -1) {
2714     $self->{onerror}->(node => $item->{node},
2715     type => 'element missing:source',
2716     level => $self->{must_level});
2717 wakaba 1.1 }
2718 wakaba 1.42
2719     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2720 wakaba 1.1 },
2721     };
2722    
2723     $Element->{$HTML_NS}->{audio} = {
2724 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2725 wakaba 1.48 status => FEATURE_HTML5_LC,
2726 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2727     src => $HTMLURIAttrChecker,
2728     ## TODO: start, loopstart, loopend, end
2729     ## ISSUE: they MUST be "value time offset"s. Value?
2730     ## ISSUE: playcount has no conformance creteria
2731     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2732     controls => $GetHTMLBooleanAttrChecker->('controls'),
2733     }),
2734 wakaba 1.1 };
2735    
2736     $Element->{$HTML_NS}->{source} = {
2737 wakaba 1.40 %HTMLEmptyChecker,
2738 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2739 wakaba 1.40 check_attrs => sub {
2740     my ($self, $item, $element_state) = @_;
2741 wakaba 1.1 $GetHTMLAttrsChecker->({
2742     src => $HTMLURIAttrChecker,
2743     type => $HTMLIMTAttrChecker,
2744     media => $HTMLMQAttrChecker,
2745 wakaba 1.40 })->($self, $item, $element_state);
2746     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2747     $self->{onerror}->(node => $item->{node},
2748 wakaba 1.1 type => 'attribute missing:src');
2749     }
2750     },
2751     };
2752    
2753     $Element->{$HTML_NS}->{canvas} = {
2754 wakaba 1.40 %HTMLTransparentChecker,
2755 wakaba 1.48 status => FEATURE_HTML5_LC,
2756 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2757 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2758     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2759     }),
2760     };
2761    
2762     $Element->{$HTML_NS}->{map} = {
2763 wakaba 1.40 %HTMLProseContentChecker,
2764 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2765 wakaba 1.40 check_attrs => sub {
2766     my ($self, $item, $element_state) = @_;
2767 wakaba 1.4 my $has_id;
2768     $GetHTMLAttrsChecker->({
2769     id => sub {
2770     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2771     my ($self, $attr) = @_;
2772     my $value = $attr->value;
2773     if (length $value > 0) {
2774     if ($self->{id}->{$value}) {
2775     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2776     push @{$self->{id}->{$value}}, $attr;
2777     } else {
2778     $self->{id}->{$value} = [$attr];
2779     }
2780 wakaba 1.1 } else {
2781 wakaba 1.4 ## NOTE: MUST contain at least one character
2782     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2783 wakaba 1.1 }
2784 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2785     $self->{onerror}->(node => $attr, type => 'space in ID');
2786     }
2787     $self->{map}->{$value} ||= $attr;
2788     $has_id = 1;
2789     },
2790 wakaba 1.49 }, {
2791     %HTMLAttrStatus,
2792     class => FEATURE_M12N10_REC,
2793     dir => FEATURE_M12N10_REC,
2794     id => FEATURE_M12N10_REC,
2795     lang => FEATURE_XHTML10_REC,
2796     name => FEATURE_M12N10_REC_DEPRECATED,
2797     onclick => FEATURE_M12N10_REC,
2798     ondblclick => FEATURE_M12N10_REC,
2799     onmousedown => FEATURE_M12N10_REC,
2800     onmouseup => FEATURE_M12N10_REC,
2801     onmouseover => FEATURE_M12N10_REC,
2802     onmousemove => FEATURE_M12N10_REC,
2803     onmouseout => FEATURE_M12N10_REC,
2804     onkeypress => FEATURE_M12N10_REC,
2805     onkeydown => FEATURE_M12N10_REC,
2806     onkeyup => FEATURE_M12N10_REC,
2807     title => FEATURE_M12N10_REC,
2808 wakaba 1.40 })->($self, $item, $element_state);
2809     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2810 wakaba 1.4 unless $has_id;
2811     },
2812 wakaba 1.1 };
2813    
2814     $Element->{$HTML_NS}->{area} = {
2815 wakaba 1.40 %HTMLEmptyChecker,
2816 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2817 wakaba 1.40 check_attrs => sub {
2818     my ($self, $item, $element_state) = @_;
2819 wakaba 1.1 my %attr;
2820     my $coords;
2821 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2822 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2823     $attr_ns = '' unless defined $attr_ns;
2824     my $attr_ln = $attr->manakai_local_name;
2825     my $checker;
2826     if ($attr_ns eq '') {
2827     $checker = {
2828     alt => sub { },
2829     ## NOTE: |alt| value has no conformance creteria.
2830     shape => $GetHTMLEnumeratedAttrChecker->({
2831     circ => -1, circle => 1,
2832     default => 1,
2833     poly => 1, polygon => -1,
2834     rect => 1, rectangle => -1,
2835     }),
2836     coords => sub {
2837     my ($self, $attr) = @_;
2838     my $value = $attr->value;
2839     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2840     $coords = [split /,/, $value];
2841     } else {
2842     $self->{onerror}->(node => $attr,
2843     type => 'coords:syntax error');
2844     }
2845     },
2846     target => $HTMLTargetAttrChecker,
2847     href => $HTMLURIAttrChecker,
2848     ping => $HTMLSpaceURIsAttrChecker,
2849 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2850 wakaba 1.1 media => $HTMLMQAttrChecker,
2851     hreflang => $HTMLLanguageTagAttrChecker,
2852     type => $HTMLIMTAttrChecker,
2853     }->{$attr_ln};
2854     if ($checker) {
2855     $attr{$attr_ln} = $attr;
2856     } else {
2857     $checker = $HTMLAttrChecker->{$attr_ln};
2858     }
2859     }
2860     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2861     || $AttrChecker->{$attr_ns}->{''};
2862     if ($checker) {
2863     $checker->($self, $attr) if ref $checker;
2864 wakaba 1.49 } elsif ($attr_ns eq '') {
2865     $self->{onerror}->(node => $attr, level => $self->{must_level},
2866     type => 'attribute not defined');
2867 wakaba 1.1 } else {
2868     $self->{onerror}->(node => $attr, level => 'unsupported',
2869     type => 'attribute');
2870     ## ISSUE: No comformance createria for unknown attributes in the spec
2871     }
2872 wakaba 1.49
2873     if ($attr_ns eq '') {
2874     $self->_attr_status_info ($attr, {
2875     %HTMLAttrStatus,
2876     %HTMLM12NCommonAttrStatus,
2877     accesskey => FEATURE_M12N10_REC,
2878     alt => FEATURE_M12N10_REC,
2879     coords => FEATURE_M12N10_REC,
2880     href => FEATURE_M12N10_REC,
2881     lang => FEATURE_XHTML10_REC,
2882     nohref => FEATURE_M12N10_REC,
2883     onblur => FEATURE_M12N10_REC,
2884     onfocus => FEATURE_M12N10_REC,
2885     shape => FEATURE_M12N10_REC,
2886     tabindex => FEATURE_M12N10_REC,
2887     target => FEATURE_M12N10_REC,
2888     }->{$attr_ln});
2889     }
2890 wakaba 1.1 }
2891    
2892     if (defined $attr{href}) {
2893 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2894 wakaba 1.1 unless (defined $attr{alt}) {
2895 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2896 wakaba 1.1 type => 'attribute missing:alt');
2897     }
2898     } else {
2899     for (qw/target ping rel media hreflang type alt/) {
2900     if (defined $attr{$_}) {
2901     $self->{onerror}->(node => $attr{$_},
2902     type => 'attribute not allowed');
2903     }
2904     }
2905     }
2906    
2907     my $shape = 'rectangle';
2908     if (defined $attr{shape}) {
2909     $shape = {
2910     circ => 'circle', circle => 'circle',
2911     default => 'default',
2912     poly => 'polygon', polygon => 'polygon',
2913     rect => 'rectangle', rectangle => 'rectangle',
2914     }->{lc $attr{shape}->value} || 'rectangle';
2915     ## TODO: ASCII lowercase?
2916     }
2917    
2918     if ($shape eq 'circle') {
2919     if (defined $attr{coords}) {
2920     if (defined $coords) {
2921     if (@$coords == 3) {
2922     if ($coords->[2] < 0) {
2923     $self->{onerror}->(node => $attr{coords},
2924     type => 'coords:out of range:2');
2925     }
2926     } else {
2927     $self->{onerror}->(node => $attr{coords},
2928     type => 'coords:number:3:'.@$coords);
2929     }
2930     } else {
2931     ## NOTE: A syntax error has been reported.
2932     }
2933     } else {
2934 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2935 wakaba 1.1 type => 'attribute missing:coords');
2936     }
2937     } elsif ($shape eq 'default') {
2938     if (defined $attr{coords}) {
2939     $self->{onerror}->(node => $attr{coords},
2940     type => 'attribute not allowed');
2941     }
2942     } elsif ($shape eq 'polygon') {
2943     if (defined $attr{coords}) {
2944     if (defined $coords) {
2945     if (@$coords >= 6) {
2946     unless (@$coords % 2 == 0) {
2947     $self->{onerror}->(node => $attr{coords},
2948     type => 'coords:number:even:'.@$coords);
2949     }
2950     } else {
2951     $self->{onerror}->(node => $attr{coords},
2952     type => 'coords:number:>=6:'.@$coords);
2953     }
2954     } else {
2955     ## NOTE: A syntax error has been reported.
2956     }
2957     } else {
2958 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2959 wakaba 1.1 type => 'attribute missing:coords');
2960     }
2961     } elsif ($shape eq 'rectangle') {
2962     if (defined $attr{coords}) {
2963     if (defined $coords) {
2964     if (@$coords == 4) {
2965     unless ($coords->[0] < $coords->[2]) {
2966     $self->{onerror}->(node => $attr{coords},
2967     type => 'coords:out of range:0');
2968     }
2969     unless ($coords->[1] < $coords->[3]) {
2970     $self->{onerror}->(node => $attr{coords},
2971     type => 'coords:out of range:1');
2972     }
2973     } else {
2974     $self->{onerror}->(node => $attr{coords},
2975     type => 'coords:number:4:'.@$coords);
2976     }
2977     } else {
2978     ## NOTE: A syntax error has been reported.
2979     }
2980     } else {
2981 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2982 wakaba 1.1 type => 'attribute missing:coords');
2983     }
2984     }
2985     },
2986     };
2987     ## TODO: only in map
2988    
2989     $Element->{$HTML_NS}->{table} = {
2990 wakaba 1.40 %HTMLChecker,
2991 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
2992     check_attrs => $GetHTMLAttrsChecker->({}, {
2993     %HTMLAttrStatus,
2994     %HTMLM12NCommonAttrStatus,
2995     align => FEATURE_M12N10_REC_DEPRECATED,
2996     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
2997     border => FEATURE_M12N10_REC,
2998     cellpadding => FEATURE_M12N10_REC,
2999     cellspacing => FEATURE_M12N10_REC,
3000     datafld => FEATURE_HTML4_REC_RESERVED,
3001     dataformatas => FEATURE_HTML4_REC_RESERVED,
3002     datapagesize => FEATURE_M12N10_REC,
3003     datasrc => FEATURE_HTML4_REC_RESERVED,
3004     frame => FEATURE_M12N10_REC,
3005     lang => FEATURE_XHTML10_REC,
3006     rules => FEATURE_M12N10_REC,
3007     summary => FEATURE_M12N10_REC,
3008     width => FEATURE_M12N10_REC,
3009     }),
3010 wakaba 1.40 check_start => sub {
3011     my ($self, $item, $element_state) = @_;
3012     $element_state->{phase} = 'before caption';
3013     },
3014     check_child_element => sub {
3015     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3016     $child_is_transparent, $element_state) = @_;
3017     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3018     $self->{onerror}->(node => $child_el,
3019     type => 'element not allowed:minus',
3020     level => $self->{must_level});
3021     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3022     #
3023     } elsif ($element_state->{phase} eq 'in tbodys') {
3024     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3025     #$element_state->{phase} = 'in tbodys';
3026     } elsif (not $element_state->{has_tfoot} and
3027     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3028     $element_state->{phase} = 'after tfoot';
3029     $element_state->{has_tfoot} = 1;
3030     } else {
3031     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3032     }
3033     } elsif ($element_state->{phase} eq 'in trs') {
3034     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3035     #$element_state->{phase} = 'in trs';
3036     } elsif (not $element_state->{has_tfoot} and
3037     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3038     $element_state->{phase} = 'after tfoot';
3039     $element_state->{has_tfoot} = 1;
3040     } else {
3041     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3042     }
3043     } elsif ($element_state->{phase} eq 'after thead') {
3044     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3045     $element_state->{phase} = 'in tbodys';
3046     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3047     $element_state->{phase} = 'in trs';
3048     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3049     $element_state->{phase} = 'in tbodys';
3050     $element_state->{has_tfoot} = 1;
3051     } else {
3052     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3053     }
3054     } elsif ($element_state->{phase} eq 'in colgroup') {
3055     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3056     $element_state->{phase} = 'in colgroup';
3057     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3058     $element_state->{phase} = 'after thead';
3059     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3060     $element_state->{phase} = 'in tbodys';
3061     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3062     $element_state->{phase} = 'in trs';
3063     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3064     $element_state->{phase} = 'in tbodys';
3065     $element_state->{has_tfoot} = 1;
3066     } else {
3067     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3068     }
3069     } elsif ($element_state->{phase} eq 'before caption') {
3070     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
3071     $element_state->{phase} = 'in colgroup';
3072     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
3073     $element_state->{phase} = 'in colgroup';
3074     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
3075     $element_state->{phase} = 'after thead';
3076     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
3077     $element_state->{phase} = 'in tbodys';
3078     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3079     $element_state->{phase} = 'in trs';
3080     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
3081     $element_state->{phase} = 'in tbodys';
3082     $element_state->{has_tfoot} = 1;
3083     } else {
3084     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3085     }
3086     } elsif ($element_state->{phase} eq 'after tfoot') {
3087     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3088     } else {
3089     die "check_child_element: Bad |table| phase: $element_state->{phase}";
3090     }
3091     },
3092     check_child_text => sub {
3093     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3094     if ($has_significant) {
3095     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3096 wakaba 1.1 }
3097 wakaba 1.40 },
3098     check_end => sub {
3099     my ($self, $item, $element_state) = @_;
3100 wakaba 1.1
3101     ## Table model errors
3102     require Whatpm::HTMLTable;
3103 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
3104 wakaba 1.1 my %opt = @_;
3105     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
3106     });
3107 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
3108 wakaba 1.1
3109 wakaba 1.40 $HTMLChecker{check_end}->(@_);
3110 wakaba 1.1 },
3111     };
3112    
3113     $Element->{$HTML_NS}->{caption} = {
3114 wakaba 1.40 %HTMLPhrasingContentChecker,
3115 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3116     check_attrs => $GetHTMLAttrsChecker->({}, {
3117     %HTMLAttrStatus,
3118     %HTMLM12NCommonAttrStatus,
3119     align => FEATURE_M12N10_REC_DEPRECATED,
3120     lang => FEATURE_XHTML10_REC,
3121     }),
3122 wakaba 1.1 };
3123    
3124     $Element->{$HTML_NS}->{colgroup} = {
3125 wakaba 1.40 %HTMLEmptyChecker,
3126 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3127 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3128 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3129     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
3130     ## TODO: "attribute not supported" if |col|.
3131     ## ISSUE: MUST NOT if any |col|?
3132     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
3133 wakaba 1.49 }, {
3134     %HTMLAttrStatus,
3135     %HTMLM12NCommonAttrStatus,
3136     align => FEATURE_M12N10_REC,
3137     char => FEATURE_M12N10_REC,
3138     charoff => FEATURE_M12N10_REC,
3139     lang => FEATURE_XHTML10_REC,
3140     span => FEATURE_M12N10_REC,
3141     valign => FEATURE_M12N10_REC,
3142     width => FEATURE_M12N10_REC,
3143 wakaba 1.1 }),
3144 wakaba 1.40 check_child_element => sub {
3145     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3146     $child_is_transparent, $element_state) = @_;
3147     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3148     $self->{onerror}->(node => $child_el,
3149     type => 'element not allowed:minus',
3150     level => $self->{must_level});
3151     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3152     #
3153     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
3154     #
3155     } else {
3156     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3157     }
3158     },
3159     check_child_text => sub {
3160     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3161     if ($has_significant) {
3162     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3163 wakaba 1.1 }
3164     },
3165     };
3166    
3167     $Element->{$HTML_NS}->{col} = {
3168 wakaba 1.40 %HTMLEmptyChecker,
3169 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3170 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3171 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3172 wakaba 1.49 }, {
3173     %HTMLAttrStatus,
3174     %HTMLM12NCommonAttrStatus,
3175     align => FEATURE_M12N10_REC,
3176     char => FEATURE_M12N10_REC,
3177     charoff => FEATURE_M12N10_REC,
3178     lang => FEATURE_XHTML10_REC,
3179     span => FEATURE_M12N10_REC,
3180     valign => FEATURE_M12N10_REC,
3181     width => FEATURE_M12N10_REC,
3182 wakaba 1.1 }),
3183     };
3184    
3185     $Element->{$HTML_NS}->{tbody} = {
3186 wakaba 1.40 %HTMLChecker,
3187 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3188     check_attrs => $GetHTMLAttrsChecker->({}, {
3189     %HTMLAttrStatus,
3190     %HTMLM12NCommonAttrStatus,
3191     align => FEATURE_M12N10_REC,
3192     char => FEATURE_M12N10_REC,
3193     charoff => FEATURE_M12N10_REC,
3194     lang => FEATURE_XHTML10_REC,
3195     valign => FEATURE_M12N10_REC,
3196     }),
3197 wakaba 1.40 check_child_element => sub {
3198     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3199     $child_is_transparent, $element_state) = @_;
3200     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3201     $self->{onerror}->(node => $child_el,
3202     type => 'element not allowed:minus',
3203     level => $self->{must_level});
3204     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3205     #
3206     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
3207     $element_state->{has_tr} = 1;
3208     } else {
3209     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3210     }
3211     },
3212     check_child_text => sub {
3213     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3214     if ($has_significant) {
3215     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3216 wakaba 1.1 }
3217 wakaba 1.40 },
3218     check_end => sub {
3219     my ($self, $item, $element_state) = @_;
3220     unless ($element_state->{has_tr}) {
3221     $self->{onerror}->(node => $item->{node},
3222     type => 'child element missing:tr');
3223 wakaba 1.1 }
3224 wakaba 1.40
3225     $HTMLChecker{check_end}->(@_);
3226 wakaba 1.1 },
3227     };
3228    
3229     $Element->{$HTML_NS}->{thead} = {
3230 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3231 wakaba 1.1 };
3232    
3233     $Element->{$HTML_NS}->{tfoot} = {
3234 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
3235 wakaba 1.1 };
3236    
3237     $Element->{$HTML_NS}->{tr} = {
3238 wakaba 1.40 %HTMLChecker,
3239 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3240     check_attrs => $GetHTMLAttrsChecker->({}, {
3241     %HTMLAttrStatus,
3242     %HTMLM12NCommonAttrStatus,
3243     align => FEATURE_M12N10_REC,
3244     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3245     char => FEATURE_M12N10_REC,
3246     charoff => FEATURE_M12N10_REC,
3247     lang => FEATURE_XHTML10_REC,
3248     valign => FEATURE_M12N10_REC,
3249     }),
3250 wakaba 1.40 check_child_element => sub {
3251     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3252     $child_is_transparent, $element_state) = @_;
3253     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3254     $self->{onerror}->(node => $child_el,
3255     type => 'element not allowed:minus',
3256     level => $self->{must_level});
3257     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3258     #
3259     } elsif ($child_nsuri eq $HTML_NS and
3260     ($child_ln eq 'td' or $child_ln eq 'th')) {
3261     $element_state->{has_cell} = 1;
3262     } else {
3263     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3264     }
3265     },
3266     check_child_text => sub {
3267     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3268     if ($has_significant) {
3269     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3270 wakaba 1.1 }
3271 wakaba 1.40 },
3272     check_end => sub {
3273     my ($self, $item, $element_state) = @_;
3274     unless ($element_state->{has_cell}) {
3275     $self->{onerror}->(node => $item->{node},
3276     type => 'child element missing:td|th');
3277 wakaba 1.1 }
3278 wakaba 1.40
3279     $HTMLChecker{check_end}->(@_);
3280 wakaba 1.1 },
3281     };
3282    
3283     $Element->{$HTML_NS}->{td} = {
3284 wakaba 1.40 %HTMLProseContentChecker,
3285 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3286 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3287 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3288     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3289 wakaba 1.49 }, {
3290     %HTMLAttrStatus,
3291     %HTMLM12NCommonAttrStatus,
3292     abbr => FEATURE_M12N10_REC,
3293     align => FEATURE_M12N10_REC,
3294     axis => FEATURE_M12N10_REC,
3295     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3296     char => FEATURE_M12N10_REC,
3297     charoff => FEATURE_M12N10_REC,
3298     colspan => FEATURE_M12N10_REC,
3299     headers => FEATURE_M12N10_REC,
3300     height => FEATURE_M12N10_REC_DEPRECATED,
3301     lang => FEATURE_XHTML10_REC,
3302     nowrap => FEATURE_M12N10_REC_DEPRECATED,
3303     rowspan => FEATURE_M12N10_REC,
3304     scope => FEATURE_M12N10_REC,
3305     valign => FEATURE_M12N10_REC,
3306     width => FEATURE_M12N10_REC_DEPRECATED,
3307 wakaba 1.1 }),
3308     };
3309    
3310     $Element->{$HTML_NS}->{th} = {
3311 wakaba 1.40 %HTMLPhrasingContentChecker,
3312 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3313 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3314 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3315     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
3316     scope => $GetHTMLEnumeratedAttrChecker
3317     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
3318 wakaba 1.49 }, {
3319     %HTMLAttrStatus,
3320     %HTMLM12NCommonAttrStatus,
3321     abbr => FEATURE_M12N10_REC,
3322     align => FEATURE_M12N10_REC,
3323     axis => FEATURE_M12N10_REC,
3324     bgcolor => FEATURE_M12N10_REC_DEPRECATED,
3325     char => FEATURE_M12N10_REC,
3326     charoff => FEATURE_M12N10_REC,
3327     colspan => FEATURE_M12N10_REC,
3328     headers => FEATURE_M12N10_REC,
3329     height => FEATURE_M12N10_REC_DEPRECATED,
3330     lang => FEATURE_XHTML10_REC,
3331     nowrap => FEATURE_M12N10_REC_DEPRECATED,
3332     rowspan => FEATURE_M12N10_REC,
3333     scope => FEATURE_M12N10_REC,
3334     valign => FEATURE_M12N10_REC,
3335     width => FEATURE_M12N10_REC_DEPRECATED,
3336 wakaba 1.1 }),
3337     };
3338    
3339     ## TODO: forms
3340 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
3341 wakaba 1.1
3342 wakaba 1.49 =pod
3343    
3344     form Common, accept, accept-charset action method enctype target onreset onsubmit name(depreacte) xhtml10.lang
3345     input Common accept accesskey alt checked disabled maxlength name readonly size src tabindex type value usemap ismap onblur onchange onfocus onselect align(deprecated) lang(xhtml10) %reserved
3346     select Common disabled multiple name size tabindex onblur onchange onfocus lang(xhtml10) %reserved
3347     option Common disabled label selected value lang(x10)
3348     textarea Common accesskey cols disabled name readonly rows tabindex onblur onchange onfocus onselect lang(x10) $resercvd
3349     button Common accesskey disabled name tabindex type value onblur onfocus lang(x10) %reserved
3350     fieldset Common lang(x10)
3351     label Common accesskey for onblur onfocus lang(xhtml10)
3352     optgroup Common disabled label lang(x10)
3353    
3354     %reserved (html4)
3355     datafld => FEATURE_HTML4_REC_RESERVED,
3356     dataformatas => FEATURE_HTML4_REC_RESERVED,
3357     datasrc => FEATURE_HTML4_REC_RESERVED,
3358    
3359     =cut
3360    
3361 wakaba 1.1 $Element->{$HTML_NS}->{script} = {
3362 wakaba 1.40 %HTMLChecker,
3363 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3364 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3365 wakaba 1.1 src => $HTMLURIAttrChecker,
3366     defer => $GetHTMLBooleanAttrChecker->('defer'),
3367     async => $GetHTMLBooleanAttrChecker->('async'),
3368     type => $HTMLIMTAttrChecker,
3369 wakaba 1.49 }, {
3370     %HTMLAttrStatus,
3371     %HTMLM12NCommonAttrStatus,
3372     charset => FEATURE_M12N10_REC,
3373     defer => FEATURE_M12N10_REC,
3374     event => FEATURE_HTML4_REC_RESERVED,
3375     for => FEATURE_HTML4_REC_RESERVED,
3376     id => FEATURE_XHTML10_REC,
3377     language => FEATURE_M12N10_REC_DEPRECATED,
3378     src => FEATURE_M12N10_REC,
3379     type => FEATURE_M12N10_REC,
3380 wakaba 1.9 }),
3381 wakaba 1.40 check_start => sub {
3382     my ($self, $item, $element_state) = @_;
3383 wakaba 1.1
3384 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
3385     $element_state->{must_be_empty} = 1;
3386 wakaba 1.1 } else {
3387     ## NOTE: No content model conformance in HTML5 spec.
3388 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
3389     my $language = $item->{node}->get_attribute_ns (undef, 'language');
3390 wakaba 1.1 if ((defined $type and $type eq '') or
3391     (defined $language and $language eq '')) {
3392     $type = 'text/javascript';
3393     } elsif (defined $type) {
3394     #
3395     } elsif (defined $language) {
3396     $type = 'text/' . $language;
3397     } else {
3398     $type = 'text/javascript';
3399     }
3400 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
3401     }
3402     },
3403     check_child_element => sub {
3404     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3405     $child_is_transparent, $element_state) = @_;
3406     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3407     $self->{onerror}->(node => $child_el,
3408     type => 'element not allowed:minus',
3409     level => $self->{must_level});
3410     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3411     #
3412     } else {
3413     if ($element_state->{must_be_empty}) {
3414     $self->{onerror}->(node => $child_el,
3415     type => 'element not allowed');
3416     }
3417     }
3418     },
3419     check_child_text => sub {
3420     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3421     if ($has_significant and
3422     $element_state->{must_be_empty}) {
3423     $self->{onerror}->(node => $child_node,
3424     type => 'character not allowed');
3425     }
3426     },
3427     check_end => sub {
3428     my ($self, $item, $element_state) = @_;
3429     unless ($element_state->{must_be_empty}) {
3430     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
3431     type => 'script:'.$element_state->{script_type});
3432     ## TODO: text/javascript support
3433    
3434     $HTMLChecker{check_end}->(@_);
3435 wakaba 1.1 }
3436     },
3437     };
3438 wakaba 1.25 ## ISSUE: Significant check and text child node
3439 wakaba 1.1
3440     ## NOTE: When script is disabled.
3441     $Element->{$HTML_NS}->{noscript} = {
3442 wakaba 1.40 %HTMLTransparentChecker,
3443 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3444     check_attrs => $GetHTMLAttrsChecker->({}, {
3445     %HTMLAttrStatus,
3446     %HTMLM12NCommonAttrStatus,
3447     lang => FEATURE_XHTML10_REC,
3448     }),
3449 wakaba 1.40 check_start => sub {
3450     my ($self, $item, $element_state) = @_;
3451 wakaba 1.3
3452 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
3453     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
3454 wakaba 1.3 }
3455    
3456 wakaba 1.40 unless ($self->{flag}->{in_head}) {
3457     $self->_add_minus_elements ($element_state,
3458     {$HTML_NS => {noscript => 1}});
3459     }
3460 wakaba 1.3 },
3461 wakaba 1.40 check_child_element => sub {
3462     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3463     $child_is_transparent, $element_state) = @_;
3464     if ($self->{flag}->{in_head}) {
3465     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3466     $self->{onerror}->(node => $child_el,
3467     type => 'element not allowed:minus',
3468     level => $self->{must_level});
3469     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3470     #
3471     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
3472     #
3473     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
3474     if ($child_el->has_attribute_ns (undef, 'scoped')) {
3475     $self->{onerror}->(node => $child_el,
3476     type => 'element not allowed:head noscript',
3477     level => $self->{must_level});
3478     }
3479     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
3480 wakaba 1.47 my $http_equiv_attr
3481     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
3482     if ($http_equiv_attr) {
3483     ## TODO: case
3484     if (lc $http_equiv_attr->value eq 'content-type') {
3485 wakaba 1.40 $self->{onerror}->(node => $child_el,
3486 wakaba 1.34 type => 'element not allowed:head noscript',
3487     level => $self->{must_level});
3488 wakaba 1.47 } else {
3489     #
3490 wakaba 1.3 }
3491 wakaba 1.47 } else {
3492     $self->{onerror}->(node => $child_el,
3493     type => 'element not allowed:head noscript',
3494     level => $self->{must_level});
3495 wakaba 1.3 }
3496 wakaba 1.40 } else {
3497     $self->{onerror}->(node => $child_el,
3498     type => 'element not allowed:head noscript',
3499     level => $self->{must_level});
3500     }
3501     } else {
3502     $HTMLTransparentChecker{check_child_element}->(@_);
3503     }
3504     },
3505     check_child_text => sub {
3506     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3507     if ($self->{flag}->{in_head}) {
3508     if ($has_significant) {
3509     $self->{onerror}->(node => $child_node,
3510     type => 'character not allowed');
3511 wakaba 1.3 }
3512     } else {
3513 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
3514     }
3515     },
3516     check_end => sub {
3517     my ($self, $item, $element_state) = @_;
3518     $self->_remove_minus_elements ($element_state);
3519     if ($self->{flag}->{in_head}) {
3520     $HTMLChecker{check_end}->(@_);
3521     } else {
3522     $HTMLPhrasingContentChecker{check_end}->(@_);
3523 wakaba 1.3 }
3524 wakaba 1.1 },
3525     };
3526 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
3527 wakaba 1.1
3528     $Element->{$HTML_NS}->{'event-source'} = {
3529 wakaba 1.40 %HTMLEmptyChecker,
3530 wakaba 1.48 status => FEATURE_HTML5_LC,
3531 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3532 wakaba 1.1 src => $HTMLURIAttrChecker,
3533     }),
3534     };
3535    
3536     $Element->{$HTML_NS}->{details} = {
3537 wakaba 1.40 %HTMLProseContentChecker,
3538 wakaba 1.48 status => FEATURE_HTML5_WD,
3539 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3540 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
3541     }),
3542 wakaba 1.43 ## NOTE: legend, Prose
3543     check_child_element => sub {
3544     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3545     $child_is_transparent, $element_state) = @_;
3546     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3547     $self->{onerror}->(node => $child_el,
3548     type => 'element not allowed:minus',
3549     level => $self->{must_level});
3550     $element_state->{has_non_legend} = 1;
3551     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3552     #
3553     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
3554     if ($element_state->{has_non_legend}) {
3555     $self->{onerror}->(node => $child_el,
3556     type => 'element not allowed:details legend',
3557     level => $self->{must_level});
3558     }
3559     $element_state->{has_legend} = 1;
3560     $element_state->{has_non_legend} = 1;
3561     } else {
3562     $HTMLProseContentChecker{check_child_element}->(@_);
3563     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
3564     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
3565     ## is conforming?
3566     }
3567     },
3568     check_child_text => sub {
3569     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3570     if ($has_significant) {
3571     $element_state->{has_non_legend} = 1;
3572     }
3573     },
3574     check_end => sub {
3575     my ($self, $item, $element_state) = @_;
3576 wakaba 1.1
3577 wakaba 1.43 unless ($element_state->{has_legend}) {
3578     $self->{onerror}->(node => $item->{node},
3579     type => 'element missing:legend',
3580     level => $self->{must_level});
3581     }
3582    
3583     $HTMLProseContentChecker{check_end}->(@_);
3584     ## ISSUE: |<details><legend>aa</legend></details>| error?
3585 wakaba 1.1 },
3586     };
3587    
3588     $Element->{$HTML_NS}->{datagrid} = {
3589 wakaba 1.40 %HTMLProseContentChecker,
3590 wakaba 1.48 status => FEATURE_HTML5_WD,
3591 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3592 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3593     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3594     }),
3595 wakaba 1.40 check_start => sub {
3596     my ($self, $item, $element_state) = @_;
3597 wakaba 1.1
3598 wakaba 1.40 $self->_add_minus_elements ($element_state,
3599     {$HTML_NS => {a => 1, datagrid => 1}});
3600     $element_state->{phase} = 'any';
3601     },
3602     ## Prose -(text* table Prose*) | table | select | datalist | Empty
3603     check_child_element => sub {
3604     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3605     $child_is_transparent, $element_state) = @_;
3606     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3607     $self->{onerror}->(node => $child_el,
3608     type => 'element not allowed:minus',
3609     level => $self->{must_level});
3610     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3611     #
3612     } elsif ($element_state->{phase} eq 'prose') {
3613     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
3614 wakaba 1.44 if (not $element_state->{has_element} and
3615 wakaba 1.40 $child_nsuri eq $HTML_NS and
3616     $child_ln eq 'table') {
3617     $self->{onerror}->(node => $child_el,
3618     type => 'element not allowed');
3619     } else {
3620 wakaba 1.8 #
3621 wakaba 1.1 }
3622 wakaba 1.40 } else {
3623     $self->{onerror}->(node => $child_el,
3624     type => 'element not allowed');
3625     }
3626 wakaba 1.43 $element_state->{has_element} = 1;
3627 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
3628     if ($child_nsuri eq $HTML_NS and
3629     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
3630     $element_state->{phase} = 'none';
3631     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
3632     $element_state->{has_element} = 1;
3633     $element_state->{phase} = 'prose';
3634 wakaba 1.43 ## TODO: transparent?
3635 wakaba 1.40 } else {
3636     $self->{onerror}->(node => $child_el,
3637     type => 'element not allowed');
3638     }
3639     } elsif ($element_state->{phase} eq 'none') {
3640     $self->{onerror}->(node => $child_el,
3641     type => 'element not allowed');
3642     } else {
3643     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
3644     }
3645     },
3646     check_child_text => sub {
3647     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3648     if ($has_significant) {
3649     if ($element_state->{phase} eq 'prose') {
3650     #
3651     } elsif ($element_state->{phase} eq 'any') {
3652     $element_state->{phase} = 'prose';
3653     } else {
3654     $self->{onerror}->(node => $child_node,
3655     type => 'character not allowed');
3656 wakaba 1.1 }
3657     }
3658 wakaba 1.40 },
3659     check_end => sub {
3660     my ($self, $item, $element_state) = @_;
3661     $self->_remove_minus_elements ($element_state);
3662 wakaba 1.1
3663 wakaba 1.40 if ($element_state->{phase} eq 'none') {
3664     $HTMLChecker{check_end}->(@_);
3665     } else {
3666     $HTMLPhrasingContentChecker{check_end}->(@_);
3667     }
3668     },
3669 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3670     ## are not disallowed (assuming that form control contents are also
3671     ## prose content).
3672 wakaba 1.1 };
3673    
3674     $Element->{$HTML_NS}->{command} = {
3675 wakaba 1.40 %HTMLEmptyChecker,
3676 wakaba 1.48 status => FEATURE_HTML5_WD,
3677 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3678 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
3679     default => $GetHTMLBooleanAttrChecker->('default'),
3680     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3681     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3682     icon => $HTMLURIAttrChecker,
3683     label => sub { }, ## NOTE: No conformance creteria
3684     radiogroup => sub { }, ## NOTE: No conformance creteria
3685     ## NOTE: |title| has special semantics, but no syntactical difference
3686     type => sub {
3687     my ($self, $attr) = @_;
3688     my $value = $attr->value;
3689     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3690     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3691     }
3692     },
3693     }),
3694     };
3695    
3696     $Element->{$HTML_NS}->{menu} = {
3697 wakaba 1.40 %HTMLPhrasingContentChecker,
3698 wakaba 1.49 status => FEATURE_M12N10_REC_DEPRECATED | FEATURE_HTML5_WD,
3699 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3700 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3701     id => sub {
3702     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3703     my ($self, $attr) = @_;
3704     my $value = $attr->value;
3705     if (length $value > 0) {
3706     if ($self->{id}->{$value}) {
3707     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3708     push @{$self->{id}->{$value}}, $attr;
3709     } else {
3710     $self->{id}->{$value} = [$attr];
3711     }
3712     } else {
3713     ## NOTE: MUST contain at least one character
3714     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3715     }
3716     if ($value =~ /[\x09-\x0D\x20]/) {
3717     $self->{onerror}->(node => $attr, type => 'space in ID');
3718     }
3719     $self->{menu}->{$value} ||= $attr;
3720     ## ISSUE: <menu id=""><p contextmenu=""> match?
3721     },
3722     label => sub { }, ## NOTE: No conformance creteria
3723     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3724 wakaba 1.49 }, {
3725     %HTMLAttrStatus,
3726     %HTMLM12NCommonAttrStatus,
3727     compat => FEATURE_M12N10_REC_DEPRECATED,
3728     lang => FEATURE_XHTML10_REC,
3729 wakaba 1.1 }),
3730 wakaba 1.40 check_start => sub {
3731     my ($self, $item, $element_state) = @_;
3732     $element_state->{phase} = 'li or phrasing';
3733     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
3734     $self->{flag}->{in_menu} = 1;
3735     },
3736     check_child_element => sub {
3737     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3738     $child_is_transparent, $element_state) = @_;
3739     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3740     $self->{onerror}->(node => $child_el,
3741     type => 'element not allowed:minus',
3742     level => $self->{must_level});
3743     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3744     #
3745     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3746     if ($element_state->{phase} eq 'li') {
3747     #
3748     } elsif ($element_state->{phase} eq 'li or phrasing') {
3749     $element_state->{phase} = 'li';
3750     } else {
3751     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3752     }
3753     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3754     if ($element_state->{phase} eq 'phrasing') {
3755     #
3756     } elsif ($element_state->{phase} eq 'li or phrasing') {
3757     $element_state->{phase} = 'phrasing';
3758     } else {
3759     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3760     }
3761     } else {
3762     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3763     }
3764     },
3765     check_child_text => sub {
3766     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3767     if ($has_significant) {
3768     if ($element_state->{phase} eq 'phrasing') {
3769     #
3770     } elsif ($element_state->{phase} eq 'li or phrasing') {
3771     $element_state->{phase} = 'phrasing';
3772     } else {
3773     $self->{onerror}->(node => $child_node,
3774     type => 'character not allowed');
3775 wakaba 1.1 }
3776     }
3777 wakaba 1.40 },
3778     check_end => sub {
3779     my ($self, $item, $element_state) = @_;
3780     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
3781    
3782     if ($element_state->{phase} eq 'li') {
3783     $HTMLChecker{check_end}->(@_);
3784     } else { # 'phrasing' or 'li or phrasing'
3785     $HTMLPhrasingContentChecker{check_end}->(@_);
3786 wakaba 1.1 }
3787     },
3788 wakaba 1.8 };
3789    
3790     $Element->{$HTML_NS}->{datatemplate} = {
3791 wakaba 1.40 %HTMLChecker,
3792 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3793 wakaba 1.40 check_child_element => sub {
3794     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3795     $child_is_transparent, $element_state) = @_;
3796     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3797     $self->{onerror}->(node => $child_el,
3798     type => 'element not allowed:minus',
3799     level => $self->{must_level});
3800     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3801     #
3802     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
3803     #
3804     } else {
3805     $self->{onerror}->(node => $child_el,
3806     type => 'element not allowed:datatemplate');
3807     }
3808     },
3809     check_child_text => sub {
3810     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3811     if ($has_significant) {
3812     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3813 wakaba 1.8 }
3814     },
3815     is_xml_root => 1,
3816     };
3817    
3818     $Element->{$HTML_NS}->{rule} = {
3819 wakaba 1.40 %HTMLChecker,
3820 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3821 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3822 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3823 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3824 wakaba 1.8 }),
3825 wakaba 1.40 check_start => sub {
3826     my ($self, $item, $element_state) = @_;
3827     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
3828     },
3829     check_child_element => sub { },
3830     check_child_text => sub { },
3831     check_end => sub {
3832     my ($self, $item, $element_state) = @_;
3833     $self->_remove_plus_elements ($element_state);
3834     $HTMLChecker{check_end}->(@_);
3835 wakaba 1.8 },
3836     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3837     ## is applied to some conforming data, results in a conforming DOM tree.":
3838     ## We don't check against this.
3839     };
3840    
3841     $Element->{$HTML_NS}->{nest} = {
3842 wakaba 1.40 %HTMLEmptyChecker,
3843 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3844 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3845 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3846     mode => sub {
3847     my ($self, $attr) = @_;
3848     my $value = $attr->value;
3849     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3850     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3851     }
3852     },
3853 wakaba 1.8 }),
3854 wakaba 1.1 };
3855    
3856     $Element->{$HTML_NS}->{legend} = {
3857 wakaba 1.40 %HTMLPhrasingContentChecker,
3858 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3859     check_attrs => $GetHTMLAttrsChecker->({}, {
3860     %HTMLAttrStatus,
3861     %HTMLM12NCommonAttrStatus,
3862     accesskey => FEATURE_M12N10_REC,
3863     align => FEATURE_M12N10_REC_DEPRECATED,
3864     lang => FEATURE_XHTML10_REC,
3865     }),
3866 wakaba 1.1 };
3867    
3868     $Element->{$HTML_NS}->{div} = {
3869 wakaba 1.40 %HTMLProseContentChecker,
3870 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3871     check_attrs => $GetHTMLAttrsChecker->({}, {
3872     %HTMLAttrStatus,
3873     %HTMLM12NCommonAttrStatus,
3874     align => FEATURE_M12N10_REC_DEPRECATED,
3875     datafld => FEATURE_HTML4_REC_RESERVED,
3876     dataformatas => FEATURE_HTML4_REC_RESERVED,
3877     datasrc => FEATURE_HTML4_REC_RESERVED,
3878     lang => FEATURE_XHTML10_REC,
3879     }),
3880 wakaba 1.1 };
3881    
3882     $Element->{$HTML_NS}->{font} = {
3883 wakaba 1.40 %HTMLTransparentChecker,
3884 wakaba 1.49 status => FEATURE_HTML5_DEFAULT | FEATURE_M12N10_REC,
3885     check_attrs => $GetHTMLAttrsChecker->({ ## TODO
3886     }, {
3887     %HTMLAttrStatus,
3888     class => FEATURE_M12N10_REC,
3889     color => FEATURE_M12N10_REC_DEPRECATED,
3890     dir => FEATURE_M12N10_REC,
3891     face => FEATURE_M12N10_REC_DEPRECATED,
3892     id => FEATURE_M12N10_REC,
3893     lang => FEATURE_XHTML10_REC,
3894     size => FEATURE_M12N10_REC_DEPRECATED,
3895     style => FEATURE_XHTML10_REC,
3896     title => FEATURE_M12N10_REC,
3897     }),
3898 wakaba 1.1 };
3899 wakaba 1.49
3900     ## TODO: frameset FEATURE_M12N10_REC
3901     ## class title id cols rows onload onunload style(x10)
3902     ## frame frameborder longdesc marginheight marginwidth noresize scrolling src name(deprecated) class,id,title,style(x10)
3903     ## noframes Common, lang(xhtml10)
3904    
3905     ## TODO: deprecated:
3906     ## basefont color face id size
3907     ## center Common lang(xhtml10)
3908     ## dir Common compat lang(xhtml10)
3909     ## isindex class dir id title prompt style(x10) lang(x10)
3910     ## s,strike,u Common xhtml10.lang
3911    
3912     ## TODO: CR: ruby rb rt rp rbc rtc @rbspan
3913 wakaba 1.1
3914     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3915    
3916     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24