/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.48 - (hide annotations) (download)
Sun Feb 24 01:38:36 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.47: +98 -1 lines
++ whatpm/Whatpm/ChangeLog	24 Feb 2008 01:38:04 -0000
	* ContentChecker.pm (check_element): Element standardized
	status information is now dispatched.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Feb 2008 01:38:30 -0000
	* HTML.pm: Standardized status attributes are added.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.48 sub FEATURE_HTML5_LC () { Whatpm::ContentChecker::FEATURE_STATUS_LC }
8     sub FEATURE_HTML5_AT_RISK () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
9     sub FEATURE_HTML5_WD () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
10     sub FEATURE_HTML5_FD () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
11     sub FEATURE_HTML5_DEFAULT () { Whatpm::ContentChecker::FEATURE_STATUS_WD }
12     sub FEATURE_WF2 () { Whatpm::ContentChecker::FEATURE_STATUS_LC }
13     sub FEATURE_HTML4_REC () { Whatpm::ContentChecker::FEATURE_STATUS_CR }
14    
15 wakaba 1.29 ## December 2007 HTML5 Classification
16    
17     my $HTMLMetadataContent = {
18     $HTML_NS => {
19     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
20     'event-source' => 1, command => 1, datatemplate => 1,
21     ## NOTE: A |meta| with no |name| element is not allowed as
22     ## a metadata content other than |head| element.
23     meta => 1,
24     },
25     ## NOTE: RDF is mentioned in the HTML5 spec.
26     ## TODO: Other RDF elements?
27     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
28     };
29    
30     my $HTMLProseContent = {
31     $HTML_NS => {
32     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
33     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
34     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
35     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
36     details => 1, ## ISSUE: "Prose element" in spec.
37     datagrid => 1, ## ISSUE: "Prose element" in spec.
38     datatemplate => 1,
39     div => 1, ## ISSUE: No category in spec.
40     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
41     ## Additionally, it must be before any other element or
42     ## non-inter-element-whitespace text node.
43     style => 1,
44    
45 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
46 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
47     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
48     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
49     command => 1, font => 1,
50     a => 1,
51     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
52     ## NOTE: |area| is allowed only as a descendant of |map|.
53     area => 1,
54    
55     ins => 1, del => 1,
56    
57     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
58     menu => 1,
59    
60     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
61     canvas => 1,
62     },
63    
64     ## NOTE: Embedded
65     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
66     q<http://www.w3.org/2000/svg> => {svg => 1},
67     };
68    
69     my $HTMLSectioningContent = {
70     $HTML_NS => {
71     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
72     ## NOTE: |body| is only allowed in |html| element.
73     body => 1,
74     },
75     };
76    
77     my $HTMLHeadingContent = {
78     $HTML_NS => {
79     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
80     },
81     };
82    
83     my $HTMLPhrasingContent = {
84     ## NOTE: All phrasing content is also prose content.
85     $HTML_NS => {
86 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
87 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
88     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
89     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
90     command => 1, font => 1,
91     a => 1,
92     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
93     ## NOTE: |area| is allowed only as a descendant of |map|.
94     area => 1,
95    
96     ## NOTE: Transparent.
97     ins => 1, del => 1,
98    
99     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
100     menu => 1,
101    
102     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
103     canvas => 1,
104     },
105    
106     ## NOTE: Embedded
107     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
108     q<http://www.w3.org/2000/svg> => {svg => 1},
109    
110     ## NOTE: And non-inter-element-whitespace text nodes.
111     };
112    
113 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
114 wakaba 1.29
115     my $HTMLInteractiveContent = {
116     $HTML_NS => {
117     a => 1,
118 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
119 wakaba 1.29 },
120     };
121    
122 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
123     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
124    
125     ## -- Common attribute syntacx checkers
126    
127 wakaba 1.1 our $AttrChecker;
128    
129     my $GetHTMLEnumeratedAttrChecker = sub {
130     my $states = shift; # {value => conforming ? 1 : -1}
131     return sub {
132     my ($self, $attr) = @_;
133     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
134     if ($states->{$value} > 0) {
135     #
136     } elsif ($states->{$value}) {
137     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
138     } else {
139     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
140     }
141     };
142     }; # $GetHTMLEnumeratedAttrChecker
143    
144     my $GetHTMLBooleanAttrChecker = sub {
145     my $local_name = shift;
146     return sub {
147     my ($self, $attr) = @_;
148     my $value = $attr->value;
149     unless ($value eq $local_name or $value eq '') {
150     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
151     }
152     };
153     }; # $GetHTMLBooleanAttrChecker
154    
155 wakaba 1.8 ## Unordered set of space-separated tokens
156 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
157 wakaba 1.8 my ($self, $attr) = @_;
158     my %word;
159     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
160     unless ($word{$word}) {
161     $word{$word} = 1;
162     } else {
163     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
164     }
165     }
166 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
167 wakaba 1.8
168 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
169     ## whose allowed values are defined by the section on link types)
170     my $HTMLLinkTypesAttrChecker = sub {
171 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
172 wakaba 1.1 my %word;
173     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
174     unless ($word{$word}) {
175     $word{$word} = 1;
176 wakaba 1.18 } elsif ($word eq 'up') {
177     #
178 wakaba 1.1 } else {
179     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
180     }
181     }
182     ## NOTE: Case sensitive match (since HTML5 spec does not say link
183     ## types are case-insensitive and it says "The value should not
184     ## be confusingly similar to any other defined value (e.g.
185     ## differing only in case).").
186     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
187     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
188     ## values to be used conformingly.
189     require Whatpm::_LinkTypeList;
190     our $LinkType;
191     for my $word (keys %word) {
192     my $def = $LinkType->{$word};
193     if (defined $def) {
194     if ($def->{status} eq 'accepted') {
195     if (defined $def->{effect}->[$a_or_area]) {
196     #
197     } else {
198     $self->{onerror}->(node => $attr,
199     type => 'link type:bad context:'.$word);
200     }
201     } elsif ($def->{status} eq 'proposal') {
202     $self->{onerror}->(node => $attr, level => 's',
203     type => 'link type:proposed:'.$word);
204 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
205     #
206     } else {
207     $self->{onerror}->(node => $attr,
208     type => 'link type:bad context:'.$word);
209     }
210 wakaba 1.1 } else { # rejected or synonym
211     $self->{onerror}->(node => $attr,
212     type => 'link type:non-conforming:'.$word);
213     }
214 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
215     if ($word eq 'alternate') {
216     #
217     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
218     $todo->{has_hyperlink_link_type} = 1;
219     }
220     }
221 wakaba 1.1 if ($def->{unique}) {
222     unless ($self->{has_link_type}->{$word}) {
223     $self->{has_link_type}->{$word} = 1;
224     } else {
225     $self->{onerror}->(node => $attr,
226     type => 'link type:duplicate:'.$word);
227     }
228     }
229     } else {
230     $self->{onerror}->(node => $attr, level => 'unsupported',
231     type => 'link type:'.$word);
232     }
233     }
234 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
235     if $word{alternate} and not $word{stylesheet};
236 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
237     ## says that using both X-Pingback: header field and HTML
238     ## <link rel=pingback> is deprecated and if both appears they
239     ## SHOULD contain exactly the same value.
240     ## ISSUE: Pingback 1.0 specification defines the exact representation
241     ## of its link element, which cannot be tested by the current arch.
242     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
243     ## include any string that matches to the pattern for the rel=pingback link,
244     ## which again inpossible to test.
245     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
246 wakaba 1.12
247     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
248 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
249     ## then they SHOULD be described in different paragraphs.".
250 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
251 wakaba 1.20
252     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
253 wakaba 1.1
254     ## URI (or IRI)
255     my $HTMLURIAttrChecker = sub {
256     my ($self, $attr) = @_;
257     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
258     my $value = $attr->value;
259     Whatpm::URIChecker->check_iri_reference ($value, sub {
260     my %opt = @_;
261     $self->{onerror}->(node => $attr, level => $opt{level},
262     type => 'URI::'.$opt{type}.
263     (defined $opt{position} ? ':'.$opt{position} : ''));
264     });
265 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
266 wakaba 1.1 }; # $HTMLURIAttrChecker
267    
268     ## A space separated list of one or more URIs (or IRIs)
269     my $HTMLSpaceURIsAttrChecker = sub {
270     my ($self, $attr) = @_;
271     my $i = 0;
272     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
273     Whatpm::URIChecker->check_iri_reference ($value, sub {
274     my %opt = @_;
275     $self->{onerror}->(node => $attr, level => $opt{level},
276 wakaba 1.2 type => 'URIs:'.':'.
277     $opt{type}.':'.$i.
278 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
279     });
280     $i++;
281     }
282     ## ISSUE: Relative references?
283     ## ISSUE: Leading or trailing white spaces are conformant?
284     ## ISSUE: A sequence of white space characters are conformant?
285     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
286     ## NOTE: Duplication seems not an error.
287 wakaba 1.4 $self->{has_uri_attr} = 1;
288 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
289    
290     my $HTMLDatetimeAttrChecker = sub {
291     my ($self, $attr) = @_;
292     my $value = $attr->value;
293     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
294     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
295     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
296     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
297     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
298     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
299     if $d < 1 or
300     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
301     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
302     if $M == 2 and $d == 29 and
303     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
304     } else {
305     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
306     }
307     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
308     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
309     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
310     if defined $s and $s > 59;
311     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
312     if $zh > 23;
313     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
314     if $zm > 59;
315     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
316     } else {
317     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
318     }
319     }; # $HTMLDatetimeAttrChecker
320    
321     my $HTMLIntegerAttrChecker = sub {
322     my ($self, $attr) = @_;
323     my $value = $attr->value;
324     unless ($value =~ /\A-?[0-9]+\z/) {
325     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
326     }
327     }; # $HTMLIntegerAttrChecker
328    
329     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
330     my $range_check = shift;
331     return sub {
332     my ($self, $attr) = @_;
333     my $value = $attr->value;
334     if ($value =~ /\A[0-9]+\z/) {
335     unless ($range_check->($value + 0)) {
336     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
337     }
338     } else {
339     $self->{onerror}->(node => $attr,
340     type => 'nninteger:syntax error');
341     }
342     };
343     }; # $GetHTMLNonNegativeIntegerAttrChecker
344    
345     my $GetHTMLFloatingPointNumberAttrChecker = sub {
346     my $range_check = shift;
347     return sub {
348     my ($self, $attr) = @_;
349     my $value = $attr->value;
350     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
351     unless ($range_check->($value + 0)) {
352     $self->{onerror}->(node => $attr, type => 'float:out of range');
353     }
354     } else {
355     $self->{onerror}->(node => $attr,
356     type => 'float:syntax error');
357     }
358     };
359     }; # $GetHTMLFloatingPointNumberAttrChecker
360    
361     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
362     ## ISSUE: RFC 2046 does not define syntax of media types.
363     ## ISSUE: The definition of "a valid MIME type" is unknown.
364     ## Syntactical correctness?
365     my $HTMLIMTAttrChecker = sub {
366     my ($self, $attr) = @_;
367     my $value = $attr->value;
368     ## ISSUE: RFC 2045 Content-Type header field allows insertion
369     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
370     ## ISSUE: RFC 2231 extension? Maybe no.
371     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
372     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
373     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
374     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
375     my @type = ($1, $2);
376     my $param = $3;
377     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
378     if (defined $2) {
379     push @type, $1 => $2;
380     } else {
381     my $n = $1;
382     my $v = $2;
383     $v =~ s/\\(.)/$1/gs;
384     push @type, $n => $v;
385     }
386     }
387     require Whatpm::IMTChecker;
388     Whatpm::IMTChecker->check_imt (sub {
389     my %opt = @_;
390     $self->{onerror}->(node => $attr, level => $opt{level},
391     type => 'IMT:'.$opt{type});
392     }, @type);
393     } else {
394     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
395     }
396     }; # $HTMLIMTAttrChecker
397    
398     my $HTMLLanguageTagAttrChecker = sub {
399 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
400    
401 wakaba 1.1 my ($self, $attr) = @_;
402 wakaba 1.6 my $value = $attr->value;
403     require Whatpm::LangTag;
404     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
405     my %opt = @_;
406     my $type = 'LangTag:'.$opt{type};
407     $type .= ':' . $opt{subtag} if defined $opt{subtag};
408     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
409     level => $opt{level});
410     });
411 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
412 wakaba 1.6
413     ## TODO: testdata
414 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
415    
416     ## "A valid media query [MQ]"
417     my $HTMLMQAttrChecker = sub {
418     my ($self, $attr) = @_;
419     $self->{onerror}->(node => $attr, level => 'unsupported',
420     type => 'media query');
421     ## ISSUE: What is "a valid media query"?
422     }; # $HTMLMQAttrChecker
423    
424     my $HTMLEventHandlerAttrChecker = sub {
425     my ($self, $attr) = @_;
426     $self->{onerror}->(node => $attr, level => 'unsupported',
427     type => 'event handler');
428     ## TODO: MUST contain valid ECMAScript code matching the
429     ## ECMAScript |FunctionBody| production. [ECMA262]
430     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
431     ## ISSUE: Automatic semicolon insertion does not apply?
432     ## ISSUE: Other script languages?
433     }; # $HTMLEventHandlerAttrChecker
434    
435     my $HTMLUsemapAttrChecker = sub {
436     my ($self, $attr) = @_;
437     ## MUST be a valid hashed ID reference to a |map| element
438     my $value = $attr->value;
439     if ($value =~ s/^#//) {
440     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
441     push @{$self->{usemap}}, [$value => $attr];
442     } else {
443     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
444     }
445     ## NOTE: Space characters in hashed ID references are conforming.
446     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
447     }; # $HTMLUsemapAttrChecker
448    
449     my $HTMLTargetAttrChecker = sub {
450     my ($self, $attr) = @_;
451     my $value = $attr->value;
452     if ($value =~ /^_/) {
453     $value = lc $value; ## ISSUE: ASCII case-insentitive?
454     unless ({
455     _self => 1, _parent => 1, _top => 1,
456     }->{$value}) {
457     $self->{onerror}->(node => $attr,
458     type => 'reserved browsing context name');
459     }
460     } else {
461 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
462 wakaba 1.1 }
463     }; # $HTMLTargetAttrChecker
464    
465 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
466     my ($self, $attr) = @_;
467    
468     ## ISSUE: Namespace resolution?
469    
470     my $value = $attr->value;
471    
472     require Whatpm::CSS::SelectorsParser;
473     my $p = Whatpm::CSS::SelectorsParser->new;
474     $p->{pseudo_class}->{$_} = 1 for qw/
475     active checked disabled empty enabled first-child first-of-type
476     focus hover indeterminate last-child last-of-type link only-child
477     only-of-type root target visited
478     lang nth-child nth-last-child nth-of-type nth-last-of-type not
479     -manakai-contains -manakai-current
480     /;
481    
482     $p->{pseudo_element}->{$_} = 1 for qw/
483     after before first-letter first-line
484     /;
485    
486     $p->{must_level} = $self->{must_level};
487     $p->{onerror} = sub {
488     my %opt = @_;
489     $opt{type} = 'selectors:'.$opt{type};
490     $self->{onerror}->(%opt, node => $attr);
491     };
492     $p->parse_string ($value);
493     }; # $HTMLSelectorsAttrChecker
494    
495 wakaba 1.1 my $HTMLAttrChecker = {
496     id => sub {
497     ## NOTE: |map| has its own variant of |id=""| checker
498     my ($self, $attr) = @_;
499     my $value = $attr->value;
500     if (length $value > 0) {
501     if ($self->{id}->{$value}) {
502     $self->{onerror}->(node => $attr, type => 'duplicate ID');
503     push @{$self->{id}->{$value}}, $attr;
504     } else {
505     $self->{id}->{$value} = [$attr];
506     }
507     if ($value =~ /[\x09-\x0D\x20]/) {
508     $self->{onerror}->(node => $attr, type => 'space in ID');
509     }
510     } else {
511     ## NOTE: MUST contain at least one character
512     $self->{onerror}->(node => $attr, type => 'empty attribute value');
513     }
514     },
515     title => sub {}, ## NOTE: No conformance creteria
516     lang => sub {
517     my ($self, $attr) = @_;
518 wakaba 1.6 my $value = $attr->value;
519     if ($value eq '') {
520     #
521     } else {
522     require Whatpm::LangTag;
523     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
524     my %opt = @_;
525     my $type = 'LangTag:'.$opt{type};
526     $type .= ':' . $opt{subtag} if defined $opt{subtag};
527     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
528     level => $opt{level});
529     });
530     }
531 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
532     unless ($attr->owner_document->manakai_is_html) {
533     $self->{onerror}->(node => $attr, type => 'in XML:lang');
534     }
535 wakaba 1.6
536     ## TODO: test data
537 wakaba 1.1 },
538     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
539     class => sub {
540     my ($self, $attr) = @_;
541     my %word;
542     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
543     unless ($word{$word}) {
544     $word{$word} = 1;
545     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
546     } else {
547     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
548     }
549     }
550     },
551     contextmenu => sub {
552     my ($self, $attr) = @_;
553     my $value = $attr->value;
554     push @{$self->{contextmenu}}, [$value => $attr];
555     ## ISSUE: "The value must be the ID of a menu element in the DOM."
556     ## What is "in the DOM"? A menu Element node that is not part
557     ## of the Document tree is in the DOM? A menu Element node that
558     ## belong to another Document tree is in the DOM?
559     },
560 wakaba 1.48 irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), ## TODO: status: Working Draft
561 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
562     ## TODO: ref, template, registrationmark
563 wakaba 1.1 };
564    
565     for (qw/
566     onabort onbeforeunload onblur onchange onclick oncontextmenu
567     ondblclick ondrag ondragend ondragenter ondragleave ondragover
568     ondragstart ondrop onerror onfocus onkeydown onkeypress
569     onkeyup onload onmessage onmousedown onmousemove onmouseout
570     onmouseover onmouseup onmousewheel onresize onscroll onselect
571     onsubmit onunload
572     /) {
573     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
574     }
575    
576     my $GetHTMLAttrsChecker = sub {
577     my $element_specific_checker = shift;
578     return sub {
579 wakaba 1.40 my ($self, $item, $element_state) = @_;
580     for my $attr (@{$item->{node}->attributes}) {
581 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
582     $attr_ns = '' unless defined $attr_ns;
583     my $attr_ln = $attr->manakai_local_name;
584     my $checker;
585     if ($attr_ns eq '') {
586     $checker = $element_specific_checker->{$attr_ln}
587 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
588 wakaba 1.1 }
589     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
590 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
591 wakaba 1.1 if ($checker) {
592 wakaba 1.40 $checker->($self, $attr, $item);
593 wakaba 1.1 } else {
594     $self->{onerror}->(node => $attr, level => 'unsupported',
595     type => 'attribute');
596     ## ISSUE: No comformance createria for unknown attributes in the spec
597     }
598     }
599     };
600     }; # $GetHTMLAttrsChecker
601    
602 wakaba 1.40 my %HTMLChecker = (
603     %Whatpm::ContentChecker::AnyChecker,
604     check_attrs => $GetHTMLAttrsChecker->({}),
605     );
606    
607     my %HTMLEmptyChecker = (
608     %HTMLChecker,
609     check_child_element => sub {
610     my ($self, $item, $child_el, $child_nsuri, $child_ln,
611     $child_is_transparent, $element_state) = @_;
612     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
613     $self->{onerror}->(node => $child_el,
614     type => 'element not allowed:minus',
615     level => $self->{must_level});
616     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
617     #
618     } else {
619     $self->{onerror}->(node => $child_el,
620     type => 'element not allowed:empty',
621     level => $self->{must_level});
622     }
623     },
624     check_child_text => sub {
625     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
626     if ($has_significant) {
627     $self->{onerror}->(node => $child_node,
628     type => 'character not allowed:empty',
629     level => $self->{must_level});
630     }
631     },
632     );
633    
634     my %HTMLTextChecker = (
635     %HTMLChecker,
636     check_child_element => sub {
637     my ($self, $item, $child_el, $child_nsuri, $child_ln,
638     $child_is_transparent, $element_state) = @_;
639     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
640     $self->{onerror}->(node => $child_el,
641     type => 'element not allowed:minus',
642     level => $self->{must_level});
643     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
644     #
645     } else {
646     $self->{onerror}->(node => $child_el, type => 'element not allowed');
647     }
648     },
649     );
650    
651     my %HTMLProseContentChecker = (
652     %HTMLChecker,
653     check_child_element => sub {
654     my ($self, $item, $child_el, $child_nsuri, $child_ln,
655     $child_is_transparent, $element_state) = @_;
656     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
657     $self->{onerror}->(node => $child_el,
658     type => 'element not allowed:minus',
659     level => $self->{must_level});
660     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
661     #
662     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
663     if ($element_state->{has_non_style} or
664     not $child_el->has_attribute_ns (undef, 'scoped')) {
665     $self->{onerror}->(node => $child_el,
666     type => 'element not allowed:prose style',
667     level => $self->{must_level});
668     }
669     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
670 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
671 wakaba 1.40 } else {
672     $element_state->{has_non_style} = 1;
673     $self->{onerror}->(node => $child_el,
674     type => 'element not allowed:prose',
675     level => $self->{must_level})
676     }
677     },
678     check_child_text => sub {
679     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
680     if ($has_significant) {
681     $element_state->{has_non_style} = 1;
682     }
683     },
684     check_end => sub {
685     my ($self, $item, $element_state) = @_;
686     if ($element_state->{has_significant}) {
687 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
688 wakaba 1.40 } elsif ($item->{transparent}) {
689     #
690     } else {
691     $self->{onerror}->(node => $item->{node},
692     level => $self->{should_level},
693     type => 'no significant content');
694     }
695     },
696     );
697    
698     my %HTMLPhrasingContentChecker = (
699     %HTMLChecker,
700     check_child_element => sub {
701     my ($self, $item, $child_el, $child_nsuri, $child_ln,
702     $child_is_transparent, $element_state) = @_;
703     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
704     $self->{onerror}->(node => $child_el,
705     type => 'element not allowed:minus',
706     level => $self->{must_level});
707     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
708     #
709     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
710     #
711     } else {
712     $self->{onerror}->(node => $child_el,
713     type => 'element not allowed:phrasing',
714     level => $self->{must_level});
715     }
716     },
717     check_end => $HTMLProseContentChecker{check_end},
718     ## NOTE: The definition for |li| assumes that the only differences
719     ## between prose and phrasing content checkers are |check_child_element|
720     ## and |check_child_text|.
721     );
722    
723     my %HTMLTransparentChecker = %HTMLProseContentChecker;
724     ## ISSUE: Significant content rule should be applied to transparent element
725 wakaba 1.46 ## with parent?
726 wakaba 1.40
727 wakaba 1.1 our $Element;
728     our $ElementDefault;
729    
730     $Element->{$HTML_NS}->{''} = {
731 wakaba 1.40 %HTMLChecker,
732     check_start => $ElementDefault->{check_start},
733 wakaba 1.1 };
734    
735     $Element->{$HTML_NS}->{html} = {
736 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
737 wakaba 1.1 is_root => 1,
738 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
739 wakaba 1.16 manifest => $HTMLURIAttrChecker,
740 wakaba 1.1 xmlns => sub {
741     my ($self, $attr) = @_;
742     my $value = $attr->value;
743     unless ($value eq $HTML_NS) {
744     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
745     }
746     unless ($attr->owner_document->manakai_is_html) {
747     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
748     ## TODO: Test
749     }
750     },
751     }),
752 wakaba 1.40 check_start => sub {
753     my ($self, $item, $element_state) = @_;
754     $element_state->{phase} = 'before head';
755     },
756     check_child_element => sub {
757     my ($self, $item, $child_el, $child_nsuri, $child_ln,
758     $child_is_transparent, $element_state) = @_;
759     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
760     $self->{onerror}->(node => $child_el,
761     type => 'element not allowed:minus',
762     level => $self->{must_level});
763     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
764     #
765     } elsif ($element_state->{phase} eq 'before head') {
766     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
767     $element_state->{phase} = 'after head';
768     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
769     $self->{onerror}->(node => $child_el,
770     type => 'ps element missing:head');
771     $element_state->{phase} = 'after body';
772     } else {
773     $self->{onerror}->(node => $child_el,
774     type => 'element not allowed');
775     }
776     } elsif ($element_state->{phase} eq 'after head') {
777     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
778     $element_state->{phase} = 'after body';
779     } else {
780     $self->{onerror}->(node => $child_el,
781     type => 'element not allowed');
782     }
783     } elsif ($element_state->{phase} eq 'after body') {
784     $self->{onerror}->(node => $child_el,
785     type => 'element not allowed');
786     } else {
787     die "check_child_element: Bad |html| phase: $element_state->{phase}";
788     }
789     },
790     check_child_text => sub {
791     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
792     if ($has_significant) {
793     $self->{onerror}->(node => $child_node,
794     type => 'character not allowed');
795     }
796     },
797     check_end => sub {
798     my ($self, $item, $element_state) = @_;
799     if ($element_state->{phase} eq 'after body') {
800     #
801     } elsif ($element_state->{phase} eq 'before head') {
802     $self->{onerror}->(node => $item->{node},
803     type => 'child element missing:head');
804     $self->{onerror}->(node => $item->{node},
805     type => 'child element missing:body');
806     } elsif ($element_state->{phase} eq 'after head') {
807     $self->{onerror}->(node => $item->{node},
808     type => 'child element missing:body');
809     } else {
810     die "check_end: Bad |html| phase: $element_state->{phase}";
811     }
812 wakaba 1.1
813 wakaba 1.40 $HTMLChecker{check_end}->(@_);
814     },
815     };
816 wakaba 1.25
817 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
818 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
819 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({}),
820     check_child_element => sub {
821     my ($self, $item, $child_el, $child_nsuri, $child_ln,
822     $child_is_transparent, $element_state) = @_;
823     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
824     $self->{onerror}->(node => $child_el,
825     type => 'element not allowed:minus',
826     level => $self->{must_level});
827     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
828     #
829     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
830     unless ($element_state->{has_title}) {
831     $element_state->{has_title} = 1;
832     } else {
833     $self->{onerror}->(node => $child_el,
834     type => 'element not allowed:head title',
835     level => $self->{must_level});
836     }
837     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
838     if ($child_el->has_attribute_ns (undef, 'scoped')) {
839     $self->{onerror}->(node => $child_el,
840     type => 'element not allowed:head style',
841     level => $self->{must_level});
842 wakaba 1.1 }
843 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
844     #
845    
846     ## NOTE: |meta| is a metadata content. However, strictly speaking,
847     ## a |meta| element with none of |charset|, |name|,
848     ## or |http-equiv| attribute is not allowed. It is non-conforming
849     ## anyway.
850     } else {
851     $self->{onerror}->(node => $child_el,
852     type => 'element not allowed:metadata',
853     level => $self->{must_level});
854     }
855     $element_state->{in_head_original} = $self->{flag}->{in_head};
856     $self->{flag}->{in_head} = 1;
857     },
858     check_child_text => sub {
859     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
860     if ($has_significant) {
861     $self->{onerror}->(node => $child_node, type => 'character not allowed');
862 wakaba 1.1 }
863 wakaba 1.40 },
864     check_end => sub {
865     my ($self, $item, $element_state) = @_;
866     unless ($element_state->{has_title}) {
867     $self->{onerror}->(node => $item->{node},
868     type => 'child element missing:title');
869 wakaba 1.1 }
870 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
871 wakaba 1.1
872 wakaba 1.40 $HTMLChecker{check_end}->(@_);
873 wakaba 1.1 },
874     };
875    
876 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
877 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
878 wakaba 1.40 %HTMLTextChecker,
879     };
880 wakaba 1.1
881 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
882 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
883 wakaba 1.40 %HTMLEmptyChecker,
884     check_attrs => sub {
885     my ($self, $item, $element_state) = @_;
886 wakaba 1.1
887 wakaba 1.40 if ($self->{has_base}) {
888     $self->{onerror}->(node => $item->{node},
889     type => 'element not allowed:base');
890     } else {
891     $self->{has_base} = 1;
892 wakaba 1.29 }
893    
894 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
895     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
896 wakaba 1.14
897     if ($self->{has_uri_attr} and $has_href) {
898 wakaba 1.4 ## ISSUE: Are these examples conforming?
899     ## <head profile="a b c"><base href> (except for |profile|'s
900     ## non-conformance)
901     ## <title xml:base="relative"/><base href/> (maybe it should be)
902     ## <unknown xmlns="relative"/><base href/> (assuming that
903     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
904     ## <style>@import 'relative';</style><base href>
905     ## <script>location.href = 'relative';</script><base href>
906 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
907     ## an exception.
908 wakaba 1.40 $self->{onerror}->(node => $item->{node},
909 wakaba 1.4 type => 'basehref after URI attribute');
910     }
911 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
912 wakaba 1.4 ## ISSUE: Are these examples conforming?
913     ## <head><title xlink:href=""/><base target="name"/></head>
914     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
915     ## (assuming that |xbl:xbl| is allowed before |base|)
916     ## NOTE: These are non-conformant anyway because of |head|'s content model:
917     ## <link href=""/><base target="name"/>
918     ## <link rel=unknown href=""><base target=name>
919 wakaba 1.40 $self->{onerror}->(node => $item->{node},
920 wakaba 1.4 type => 'basetarget after hyperlink');
921     }
922    
923 wakaba 1.14 if (not $has_href and not $has_target) {
924 wakaba 1.40 $self->{onerror}->(node => $item->{node},
925 wakaba 1.14 type => 'attribute missing:href|target');
926     }
927    
928 wakaba 1.4 return $GetHTMLAttrsChecker->({
929     href => $HTMLURIAttrChecker,
930     target => $HTMLTargetAttrChecker,
931 wakaba 1.40 })->($self, $item, $element_state);
932 wakaba 1.4 },
933 wakaba 1.1 };
934    
935     $Element->{$HTML_NS}->{link} = {
936 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
937 wakaba 1.40 %HTMLEmptyChecker,
938     check_attrs => sub {
939     my ($self, $item, $element_state) = @_;
940 wakaba 1.1 $GetHTMLAttrsChecker->({
941     href => $HTMLURIAttrChecker,
942 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
943 wakaba 1.1 media => $HTMLMQAttrChecker,
944     hreflang => $HTMLLanguageTagAttrChecker,
945     type => $HTMLIMTAttrChecker,
946     ## NOTE: Though |title| has special semantics,
947     ## syntactically same as the |title| as global attribute.
948 wakaba 1.40 })->($self, $item, $element_state);
949     if ($item->{node}->has_attribute_ns (undef, 'href')) {
950     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
951 wakaba 1.4 } else {
952 wakaba 1.40 $self->{onerror}->(node => $item->{node},
953 wakaba 1.1 type => 'attribute missing:href');
954     }
955 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
956     $self->{onerror}->(node => $item->{node},
957 wakaba 1.1 type => 'attribute missing:rel');
958     }
959     },
960     };
961    
962     $Element->{$HTML_NS}->{meta} = {
963 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
964 wakaba 1.40 %HTMLEmptyChecker,
965     check_attrs => sub {
966     my ($self, $item, $element_state) = @_;
967 wakaba 1.1 my $name_attr;
968     my $http_equiv_attr;
969     my $charset_attr;
970     my $content_attr;
971 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
972 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
973     $attr_ns = '' unless defined $attr_ns;
974     my $attr_ln = $attr->manakai_local_name;
975     my $checker;
976     if ($attr_ns eq '') {
977     if ($attr_ln eq 'content') {
978     $content_attr = $attr;
979     $checker = 1;
980     } elsif ($attr_ln eq 'name') {
981     $name_attr = $attr;
982     $checker = 1;
983     } elsif ($attr_ln eq 'http-equiv') {
984     $http_equiv_attr = $attr;
985     $checker = 1;
986     } elsif ($attr_ln eq 'charset') {
987     $charset_attr = $attr;
988     $checker = 1;
989     } else {
990     $checker = $HTMLAttrChecker->{$attr_ln}
991     || $AttrChecker->{$attr_ns}->{$attr_ln}
992     || $AttrChecker->{$attr_ns}->{''};
993     }
994     } else {
995     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
996     || $AttrChecker->{$attr_ns}->{''};
997     }
998     if ($checker) {
999     $checker->($self, $attr) if ref $checker;
1000     } else {
1001     $self->{onerror}->(node => $attr, level => 'unsupported',
1002     type => 'attribute');
1003     ## ISSUE: No comformance createria for unknown attributes in the spec
1004     }
1005     }
1006    
1007     if (defined $name_attr) {
1008     if (defined $http_equiv_attr) {
1009     $self->{onerror}->(node => $http_equiv_attr,
1010     type => 'attribute not allowed');
1011     } elsif (defined $charset_attr) {
1012     $self->{onerror}->(node => $charset_attr,
1013     type => 'attribute not allowed');
1014     }
1015     my $metadata_name = $name_attr->value;
1016     my $metadata_value;
1017     if (defined $content_attr) {
1018     $metadata_value = $content_attr->value;
1019     } else {
1020 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1021 wakaba 1.1 type => 'attribute missing:content');
1022     $metadata_value = '';
1023     }
1024     } elsif (defined $http_equiv_attr) {
1025     if (defined $charset_attr) {
1026     $self->{onerror}->(node => $charset_attr,
1027     type => 'attribute not allowed');
1028     }
1029     unless (defined $content_attr) {
1030 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1031 wakaba 1.1 type => 'attribute missing:content');
1032     }
1033     } elsif (defined $charset_attr) {
1034     if (defined $content_attr) {
1035     $self->{onerror}->(node => $content_attr,
1036     type => 'attribute not allowed');
1037     }
1038     } else {
1039     if (defined $content_attr) {
1040     $self->{onerror}->(node => $content_attr,
1041     type => 'attribute not allowed');
1042 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1043 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1044     } else {
1045 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1046 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1047     }
1048     }
1049    
1050 wakaba 1.32 my $check_charset_decl = sub () {
1051 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1052 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1053     for my $el (@{$parent->child_nodes}) {
1054     next unless $el->node_type == 1; # ELEMENT_NODE
1055 wakaba 1.40 unless ($el eq $item->{node}) {
1056 wakaba 1.29 ## NOTE: Not the first child element.
1057 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1058 wakaba 1.32 type => 'element not allowed:meta charset',
1059     level => $self->{must_level});
1060 wakaba 1.29 }
1061     last;
1062     ## NOTE: Entity references are not supported.
1063     }
1064     } else {
1065 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1066 wakaba 1.32 type => 'element not allowed:meta charset',
1067     level => $self->{must_level});
1068 wakaba 1.29 }
1069    
1070 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1071     $self->{onerror}->(node => $item->{node},
1072 wakaba 1.32 type => 'in XML:charset',
1073     level => $self->{must_level});
1074 wakaba 1.1 }
1075 wakaba 1.32 }; # $check_charset_decl
1076 wakaba 1.21
1077 wakaba 1.32 my $check_charset = sub ($$) {
1078     my ($attr, $charset_value) = @_;
1079 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1080     ## is not explicitly spelled in the HTML5 spec, the Character Set
1081     ## registry of IANA, which is referenced from HTML5 spec, says that
1082     ## charset name is case-insensitive.
1083     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1084    
1085     require Message::Charset::Info;
1086     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1087 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1088 wakaba 1.21 if (defined $ic) {
1089     ## TODO: Test for this case
1090     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1091     if ($charset ne $ic_charset) {
1092 wakaba 1.32 $self->{onerror}->(node => $attr,
1093 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1094 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1095     level => $self->{must_level});
1096 wakaba 1.21 }
1097     } else {
1098     ## NOTE: MUST, but not checkable, since the document is not originally
1099     ## in serialized form (or the parser does not preserve the input
1100     ## encoding information).
1101 wakaba 1.32 $self->{onerror}->(node => $attr,
1102     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1103 wakaba 1.21 level => 'unsupported');
1104     }
1105    
1106     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1107     ## Syntactically valid and registered? What about x-charset names?
1108     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1109     ($charset_value)) {
1110 wakaba 1.32 $self->{onerror}->(node => $attr,
1111     type => 'charset:syntax error:'.$charset_value, ## TODO
1112     level => $self->{must_level});
1113 wakaba 1.21 }
1114    
1115     if ($charset) {
1116     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1117     ## with no "preferred MIME name" label)?
1118     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1119     if (($charset_status &
1120     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1121     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1122 wakaba 1.32 $self->{onerror}->(node => $attr,
1123 wakaba 1.21 type => 'charset:not preferred:'.
1124 wakaba 1.32 $charset_value, ## TODO
1125     level => $self->{must_level});
1126 wakaba 1.21 }
1127     if (($charset_status &
1128     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1129     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1130     if ($charset_value =~ /^x-/) {
1131 wakaba 1.32 $self->{onerror}->(node => $attr,
1132     type => 'charset:private:'.$charset_value, ## TODO
1133 wakaba 1.21 level => $self->{good_level});
1134     } else {
1135 wakaba 1.32 $self->{onerror}->(node => $attr,
1136 wakaba 1.21 type => 'charset:not registered:'.
1137 wakaba 1.32 $charset_value, ## TODO
1138 wakaba 1.21 level => $self->{good_level});
1139     }
1140     }
1141     } elsif ($charset_value =~ /^x-/) {
1142 wakaba 1.32 $self->{onerror}->(node => $attr,
1143     type => 'charset:private:'.$charset_value, ## TODO
1144 wakaba 1.21 level => $self->{good_level});
1145     } else {
1146 wakaba 1.32 $self->{onerror}->(node => $attr,
1147     type => 'charset:not registered:'.$charset_value, ## TODO
1148 wakaba 1.21 level => $self->{good_level});
1149     }
1150    
1151 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1152     $self->{onerror}->(node => $attr,
1153 wakaba 1.22 type => 'character reference in charset',
1154     level => $self->{must_level});
1155     }
1156 wakaba 1.32 }; # $check_charset
1157    
1158     ## TODO: metadata conformance
1159    
1160     ## TODO: pragma conformance
1161     if (defined $http_equiv_attr) { ## An enumerated attribute
1162     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1163     if ({
1164     'refresh' => 1,
1165     'default-style' => 1,
1166     }->{$keyword}) {
1167     #
1168 wakaba 1.33
1169     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1170 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1171 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1172     ## 1221, there are still many occurence of "Content-Type" state in
1173     ## the spec.
1174    
1175 wakaba 1.32 $check_charset_decl->();
1176     if ($content_attr) {
1177     my $content = $content_attr->value;
1178     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1179     $check_charset->($content_attr, $1);
1180     } else {
1181     $self->{onerror}->(node => $content_attr,
1182     type => 'meta content-type syntax error',
1183     level => $self->{must_level});
1184     }
1185     }
1186     } else {
1187     $self->{onerror}->(node => $http_equiv_attr,
1188     type => 'enumerated:invalid');
1189     }
1190     }
1191    
1192     if (defined $charset_attr) {
1193     $check_charset_decl->();
1194     $check_charset->($charset_attr, $charset_attr->value);
1195 wakaba 1.1 }
1196     },
1197     };
1198    
1199     $Element->{$HTML_NS}->{style} = {
1200 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1201 wakaba 1.40 %HTMLChecker,
1202     check_attrs => $GetHTMLAttrsChecker->({
1203 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1204     media => $HTMLMQAttrChecker,
1205     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1206     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1207     ## not different
1208     }),
1209 wakaba 1.40 check_start => sub {
1210     my ($self, $item, $element_state) = @_;
1211    
1212 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1213 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1214 wakaba 1.27 if (not defined $type or
1215     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1216 wakaba 1.40 $element_state->{allow_element} = 0;
1217     $element_state->{style_type} = 'text/css';
1218     } else {
1219     $element_state->{allow_element} = 1; # unknown
1220     $element_state->{style_type} = $type; ## TODO: $type normalization
1221     }
1222     },
1223     check_child_element => sub {
1224     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1225     $child_is_transparent, $element_state) = @_;
1226     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1227     $self->{onerror}->(node => $child_el,
1228     type => 'element not allowed:minus',
1229     level => $self->{must_level});
1230     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1231     #
1232     } elsif ($element_state->{allow_element}) {
1233     #
1234     } else {
1235     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1236     }
1237     },
1238     check_child_text => sub {
1239     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1240     $element_state->{text} .= $child_node->text_content;
1241     },
1242     check_end => sub {
1243     my ($self, $item, $element_state) = @_;
1244     if ($element_state->{style_type} eq 'text/css') {
1245     $self->{onsubdoc}->({s => $element_state->{text},
1246     container_node => $item->{node},
1247 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1248 wakaba 1.27 } else {
1249 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1250     type => 'style:'.$element_state->{style_type});
1251 wakaba 1.27 }
1252 wakaba 1.40
1253     $HTMLChecker{check_end}->(@_);
1254 wakaba 1.1 },
1255     };
1256 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1257 wakaba 1.1
1258     $Element->{$HTML_NS}->{body} = {
1259 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1260 wakaba 1.40 %HTMLProseContentChecker,
1261 wakaba 1.1 };
1262    
1263     $Element->{$HTML_NS}->{section} = {
1264 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1265 wakaba 1.40 %HTMLProseContentChecker,
1266 wakaba 1.1 };
1267    
1268     $Element->{$HTML_NS}->{nav} = {
1269 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1270 wakaba 1.40 %HTMLProseContentChecker,
1271 wakaba 1.1 };
1272    
1273     $Element->{$HTML_NS}->{article} = {
1274 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1275 wakaba 1.40 %HTMLProseContentChecker,
1276 wakaba 1.1 };
1277    
1278     $Element->{$HTML_NS}->{blockquote} = {
1279 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1280 wakaba 1.40 %HTMLProseContentChecker,
1281     check_attrs => $GetHTMLAttrsChecker->({
1282 wakaba 1.1 cite => $HTMLURIAttrChecker,
1283     }),
1284     };
1285    
1286     $Element->{$HTML_NS}->{aside} = {
1287 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1288 wakaba 1.40 %HTMLProseContentChecker,
1289 wakaba 1.1 };
1290    
1291     $Element->{$HTML_NS}->{h1} = {
1292 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1293 wakaba 1.40 %HTMLPhrasingContentChecker,
1294     check_start => sub {
1295     my ($self, $item, $element_state) = @_;
1296     $self->{flag}->{has_hn} = 1;
1297 wakaba 1.1 },
1298     };
1299    
1300 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1301 wakaba 1.1
1302 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1303 wakaba 1.1
1304 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1305 wakaba 1.1
1306 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1307 wakaba 1.1
1308 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1309 wakaba 1.1
1310 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1311    
1312 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1313 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1314 wakaba 1.40 %HTMLProseContentChecker,
1315     check_start => sub {
1316     my ($self, $item, $element_state) = @_;
1317     $self->_add_minus_elements ($element_state,
1318     {$HTML_NS => {qw/header 1 footer 1/}},
1319     $HTMLSectioningContent);
1320     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1321     $self->{flag}->{has_hn} = 0;
1322     },
1323     check_end => sub {
1324     my ($self, $item, $element_state) = @_;
1325     $self->_remove_minus_elements ($element_state);
1326     unless ($self->{flag}->{has_hn}) {
1327     $self->{onerror}->(node => $item->{node},
1328     type => 'element missing:hn');
1329     }
1330     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1331 wakaba 1.1
1332 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1333 wakaba 1.1 },
1334 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1335 wakaba 1.1 };
1336    
1337     $Element->{$HTML_NS}->{footer} = {
1338 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1339 wakaba 1.40 %HTMLProseContentChecker,
1340     check_start => sub {
1341     my ($self, $item, $element_state) = @_;
1342     $self->_add_minus_elements ($element_state,
1343     {$HTML_NS => {footer => 1}},
1344     $HTMLSectioningContent, $HTMLHeadingContent);
1345     },
1346     check_end => sub {
1347     my ($self, $item, $element_state) = @_;
1348     $self->_remove_minus_elements ($element_state);
1349 wakaba 1.1
1350 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1351 wakaba 1.1 },
1352     };
1353    
1354     $Element->{$HTML_NS}->{address} = {
1355 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1356 wakaba 1.40 %HTMLProseContentChecker,
1357     check_start => sub {
1358     my ($self, $item, $element_state) = @_;
1359     $self->_add_minus_elements ($element_state,
1360     {$HTML_NS => {footer => 1, address => 1}},
1361     $HTMLSectioningContent, $HTMLHeadingContent);
1362     },
1363     check_end => sub {
1364     my ($self, $item, $element_state) = @_;
1365     $self->_remove_minus_elements ($element_state);
1366 wakaba 1.29
1367 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1368 wakaba 1.29 },
1369 wakaba 1.1 };
1370    
1371     $Element->{$HTML_NS}->{p} = {
1372 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1373 wakaba 1.40 %HTMLPhrasingContentChecker,
1374 wakaba 1.1 };
1375    
1376     $Element->{$HTML_NS}->{hr} = {
1377 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1378 wakaba 1.40 %HTMLEmptyChecker,
1379 wakaba 1.1 };
1380    
1381     $Element->{$HTML_NS}->{br} = {
1382 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1383 wakaba 1.40 %HTMLEmptyChecker,
1384 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1385     ## (This requirement is semantic so that we cannot check.)
1386 wakaba 1.1 };
1387    
1388     $Element->{$HTML_NS}->{dialog} = {
1389 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1390 wakaba 1.40 %HTMLChecker,
1391     check_start => sub {
1392     my ($self, $item, $element_state) = @_;
1393     $element_state->{phase} = 'before dt';
1394     },
1395     check_child_element => sub {
1396     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1397     $child_is_transparent, $element_state) = @_;
1398     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1399     $self->{onerror}->(node => $child_el,
1400     type => 'element not allowed:minus',
1401     level => $self->{must_level});
1402     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1403     #
1404     } elsif ($element_state->{phase} eq 'before dt') {
1405     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1406     $element_state->{phase} = 'before dd';
1407     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1408     $self->{onerror}
1409     ->(node => $child_el, type => 'ps element missing:dt');
1410     $element_state->{phase} = 'before dt';
1411     } else {
1412     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1413     }
1414     } elsif ($element_state->{phase} eq 'before dd') {
1415     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1416     $element_state->{phase} = 'before dt';
1417     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1418     $self->{onerror}
1419     ->(node => $child_el, type => 'ps element missing:dd');
1420     $element_state->{phase} = 'before dd';
1421     } else {
1422     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1423 wakaba 1.1 }
1424 wakaba 1.40 } else {
1425     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1426     }
1427     },
1428     check_child_text => sub {
1429     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1430     if ($has_significant) {
1431     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1432 wakaba 1.1 }
1433 wakaba 1.40 },
1434     check_end => sub {
1435     my ($self, $item, $element_state) = @_;
1436     if ($element_state->{phase} eq 'before dd') {
1437     $self->{onerror}->(node => $item->{node},
1438     type => 'child element missing:dd');
1439 wakaba 1.1 }
1440 wakaba 1.40
1441     $HTMLChecker{check_end}->(@_);
1442 wakaba 1.1 },
1443     };
1444    
1445     $Element->{$HTML_NS}->{pre} = {
1446 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1447 wakaba 1.40 %HTMLPhrasingContentChecker,
1448 wakaba 1.1 };
1449    
1450     $Element->{$HTML_NS}->{ol} = {
1451 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1452 wakaba 1.40 %HTMLChecker,
1453     check_attrs => $GetHTMLAttrsChecker->({
1454 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1455     }),
1456 wakaba 1.40 check_child_element => sub {
1457     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1458     $child_is_transparent, $element_state) = @_;
1459     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1460     $self->{onerror}->(node => $child_el,
1461     type => 'element not allowed:minus',
1462     level => $self->{must_level});
1463     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1464     #
1465     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1466     #
1467     } else {
1468     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1469 wakaba 1.1 }
1470 wakaba 1.40 },
1471     check_child_text => sub {
1472     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1473     if ($has_significant) {
1474     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1475 wakaba 1.1 }
1476     },
1477     };
1478    
1479     $Element->{$HTML_NS}->{ul} = {
1480 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1481 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1482 wakaba 1.1 };
1483    
1484     $Element->{$HTML_NS}->{li} = {
1485 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1486 wakaba 1.40 %HTMLProseContentChecker,
1487     check_attrs => $GetHTMLAttrsChecker->({
1488 wakaba 1.1 start => sub {
1489     my ($self, $attr) = @_;
1490     my $parent = $attr->owner_element->manakai_parent_element;
1491     if (defined $parent) {
1492     my $parent_ns = $parent->namespace_uri;
1493     $parent_ns = '' unless defined $parent_ns;
1494     my $parent_ln = $parent->manakai_local_name;
1495     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1496     $self->{onerror}->(node => $attr, level => 'unsupported',
1497     type => 'attribute');
1498     }
1499     }
1500     $HTMLIntegerAttrChecker->($self, $attr);
1501     },
1502     }),
1503 wakaba 1.40 check_child_element => sub {
1504     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1505     $child_is_transparent, $element_state) = @_;
1506     if ($self->{flag}->{in_menu}) {
1507     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1508     } else {
1509     $HTMLProseContentChecker{check_child_element}->(@_);
1510     }
1511     },
1512     check_child_text => sub {
1513     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1514     if ($self->{flag}->{in_menu}) {
1515     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1516 wakaba 1.1 } else {
1517 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1518 wakaba 1.1 }
1519     },
1520     };
1521    
1522     $Element->{$HTML_NS}->{dl} = {
1523 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1524 wakaba 1.40 %HTMLChecker,
1525     check_start => sub {
1526     my ($self, $item, $element_state) = @_;
1527     $element_state->{phase} = 'before dt';
1528     },
1529     check_child_element => sub {
1530     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1531     $child_is_transparent, $element_state) = @_;
1532     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1533     $self->{onerror}->(node => $child_el,
1534     type => 'element not allowed:minus',
1535     level => $self->{must_level});
1536     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1537     #
1538     } elsif ($element_state->{phase} eq 'in dds') {
1539     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1540     #$element_state->{phase} = 'in dds';
1541     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1542     $element_state->{phase} = 'in dts';
1543     } else {
1544     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1545     }
1546     } elsif ($element_state->{phase} eq 'in dts') {
1547     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1548     #$element_state->{phase} = 'in dts';
1549     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1550     $element_state->{phase} = 'in dds';
1551     } else {
1552     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1553     }
1554     } elsif ($element_state->{phase} eq 'before dt') {
1555     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1556     $element_state->{phase} = 'in dts';
1557     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1558     $self->{onerror}
1559     ->(node => $child_el, type => 'ps element missing:dt');
1560     $element_state->{phase} = 'in dds';
1561     } else {
1562     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1563 wakaba 1.1 }
1564 wakaba 1.40 } else {
1565     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1566 wakaba 1.1 }
1567 wakaba 1.40 },
1568     check_child_text => sub {
1569     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1570     if ($has_significant) {
1571     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1572     }
1573     },
1574     check_end => sub {
1575     my ($self, $item, $element_state) = @_;
1576     if ($element_state->{phase} eq 'in dts') {
1577     $self->{onerror}->(node => $item->{node},
1578     type => 'child element missing:dd');
1579 wakaba 1.1 }
1580    
1581 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1582 wakaba 1.1 },
1583     };
1584    
1585     $Element->{$HTML_NS}->{dt} = {
1586 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1587 wakaba 1.40 %HTMLPhrasingContentChecker,
1588 wakaba 1.1 };
1589    
1590     $Element->{$HTML_NS}->{dd} = {
1591 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1592 wakaba 1.40 %HTMLProseContentChecker,
1593 wakaba 1.1 };
1594    
1595     $Element->{$HTML_NS}->{a} = {
1596 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1597 wakaba 1.40 %HTMLPhrasingContentChecker,
1598     check_attrs => sub {
1599     my ($self, $item, $element_state) = @_;
1600 wakaba 1.1 my %attr;
1601 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1602 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1603     $attr_ns = '' unless defined $attr_ns;
1604     my $attr_ln = $attr->manakai_local_name;
1605     my $checker;
1606     if ($attr_ns eq '') {
1607     $checker = {
1608     target => $HTMLTargetAttrChecker,
1609     href => $HTMLURIAttrChecker,
1610     ping => $HTMLSpaceURIsAttrChecker,
1611 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1612 wakaba 1.1 media => $HTMLMQAttrChecker,
1613     hreflang => $HTMLLanguageTagAttrChecker,
1614     type => $HTMLIMTAttrChecker,
1615     }->{$attr_ln};
1616     if ($checker) {
1617     $attr{$attr_ln} = $attr;
1618     } else {
1619     $checker = $HTMLAttrChecker->{$attr_ln};
1620     }
1621     }
1622     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1623     || $AttrChecker->{$attr_ns}->{''};
1624     if ($checker) {
1625     $checker->($self, $attr) if ref $checker;
1626     } else {
1627     $self->{onerror}->(node => $attr, level => 'unsupported',
1628     type => 'attribute');
1629     ## ISSUE: No comformance createria for unknown attributes in the spec
1630     }
1631     }
1632    
1633 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1634 wakaba 1.4 if (defined $attr{href}) {
1635     $self->{has_hyperlink_element} = 1;
1636 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1637 wakaba 1.4 } else {
1638 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1639     if (defined $attr{$_}) {
1640     $self->{onerror}->(node => $attr{$_},
1641     type => 'attribute not allowed');
1642     }
1643     }
1644     }
1645     },
1646 wakaba 1.40 check_start => sub {
1647     my ($self, $item, $element_state) = @_;
1648     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1649     },
1650     check_end => sub {
1651     my ($self, $item, $element_state) = @_;
1652     $self->_remove_minus_elements ($element_state);
1653 wakaba 1.1
1654 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1655 wakaba 1.1 },
1656     };
1657    
1658     $Element->{$HTML_NS}->{q} = {
1659 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1660 wakaba 1.40 %HTMLPhrasingContentChecker,
1661     check_attrs => $GetHTMLAttrsChecker->({
1662 wakaba 1.1 cite => $HTMLURIAttrChecker,
1663     }),
1664     };
1665    
1666     $Element->{$HTML_NS}->{cite} = {
1667 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1668 wakaba 1.40 %HTMLPhrasingContentChecker,
1669 wakaba 1.1 };
1670    
1671     $Element->{$HTML_NS}->{em} = {
1672 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1673 wakaba 1.40 %HTMLPhrasingContentChecker,
1674 wakaba 1.1 };
1675    
1676     $Element->{$HTML_NS}->{strong} = {
1677 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1678 wakaba 1.40 %HTMLPhrasingContentChecker,
1679 wakaba 1.1 };
1680    
1681     $Element->{$HTML_NS}->{small} = {
1682 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1683 wakaba 1.40 %HTMLPhrasingContentChecker,
1684 wakaba 1.1 };
1685    
1686 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
1687 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1688 wakaba 1.40 %HTMLPhrasingContentChecker,
1689 wakaba 1.1 };
1690    
1691     $Element->{$HTML_NS}->{dfn} = {
1692 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1693 wakaba 1.40 %HTMLPhrasingContentChecker,
1694     check_start => sub {
1695     my ($self, $item, $element_state) = @_;
1696     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
1697 wakaba 1.1
1698 wakaba 1.40 my $node = $item->{node};
1699 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
1700     unless (defined $term) {
1701     for my $child (@{$node->child_nodes}) {
1702     if ($child->node_type == 1) { # ELEMENT_NODE
1703     if (defined $term) {
1704     undef $term;
1705     last;
1706     } elsif ($child->manakai_local_name eq 'abbr') {
1707     my $nsuri = $child->namespace_uri;
1708     if (defined $nsuri and $nsuri eq $HTML_NS) {
1709     my $attr = $child->get_attribute_node_ns (undef, 'title');
1710     if ($attr) {
1711     $term = $attr->value;
1712     }
1713     }
1714     }
1715     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1716     ## TEXT_NODE or CDATA_SECTION_NODE
1717     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1718     next;
1719     }
1720     undef $term;
1721     last;
1722     }
1723     }
1724     unless (defined $term) {
1725     $term = $node->text_content;
1726     }
1727     }
1728     if ($self->{term}->{$term}) {
1729     $self->{onerror}->(node => $node, type => 'duplicate term');
1730     push @{$self->{term}->{$term}}, $node;
1731     } else {
1732     $self->{term}->{$term} = [$node];
1733     }
1734     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1735     ## has |title|.
1736 wakaba 1.40 },
1737     check_end => sub {
1738     my ($self, $item, $element_state) = @_;
1739     $self->_remove_minus_elements ($element_state);
1740 wakaba 1.1
1741 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1742 wakaba 1.1 },
1743     };
1744    
1745     $Element->{$HTML_NS}->{abbr} = {
1746 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1747 wakaba 1.40 %HTMLPhrasingContentChecker,
1748 wakaba 1.1 };
1749    
1750     $Element->{$HTML_NS}->{time} = {
1751 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1752 wakaba 1.40 %HTMLPhrasingContentChecker,
1753     check_attrs => $GetHTMLAttrsChecker->({
1754 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
1755     }),
1756     ## TODO: Write tests
1757 wakaba 1.40 check_end => sub {
1758     my ($self, $item, $element_state) = @_;
1759 wakaba 1.1
1760 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
1761 wakaba 1.1 my $input;
1762     my $reg_sp;
1763     my $input_node;
1764     if ($attr) {
1765     $input = $attr->value;
1766     $reg_sp = qr/[\x09-\x0D\x20]*/;
1767     $input_node = $attr;
1768     } else {
1769 wakaba 1.40 $input = $item->{node}->text_content;
1770 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
1771 wakaba 1.40 $input_node = $item->{node};
1772 wakaba 1.1
1773     ## ISSUE: What is the definition for "successfully extracts a date
1774     ## or time"? If the algorithm says the string is invalid but
1775     ## return some date or time, is it "successfully"?
1776     }
1777    
1778     my $hour;
1779     my $minute;
1780     my $second;
1781     if ($input =~ /
1782     \A
1783     [\x09-\x0D\x20]*
1784     ([0-9]+) # 1
1785     (?>
1786     -([0-9]+) # 2
1787     -([0-9]+) # 3
1788     [\x09-\x0D\x20]*
1789     (?>
1790     T
1791     [\x09-\x0D\x20]*
1792     )?
1793     ([0-9]+) # 4
1794     :([0-9]+) # 5
1795     (?>
1796     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1797     )?
1798     [\x09-\x0D\x20]*
1799     (?>
1800     Z
1801     [\x09-\x0D\x20]*
1802     |
1803     [+-]([0-9]+):([0-9]+) # 7, 8
1804     [\x09-\x0D\x20]*
1805     )?
1806     \z
1807     |
1808     :([0-9]+) # 9
1809     (?>
1810     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1811     )?
1812     [\x09-\x0D\x20]*\z
1813     )
1814     /x) {
1815     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1816     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1817     length $4 != 2 or length $5 != 2) {
1818     $self->{onerror}->(node => $input_node,
1819     type => 'dateortime:syntax error');
1820     }
1821    
1822     if (1 <= $2 and $2 <= 12) {
1823     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1824     if $3 < 1 or
1825     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1826     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1827     if $2 == 2 and $3 == 29 and
1828     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1829     } else {
1830     $self->{onerror}->(node => $input_node,
1831     type => 'datetime:bad month');
1832     }
1833    
1834     ($hour, $minute, $second) = ($4, $5, $6);
1835    
1836     if (defined $7) { ## [+-]hh:mm
1837     if (length $7 != 2 or length $8 != 2) {
1838     $self->{onerror}->(node => $input_node,
1839     type => 'dateortime:syntax error');
1840     }
1841    
1842     $self->{onerror}->(node => $input_node,
1843     type => 'datetime:bad timezone hour')
1844     if $7 > 23;
1845     $self->{onerror}->(node => $input_node,
1846     type => 'datetime:bad timezone minute')
1847     if $8 > 59;
1848     }
1849     } else { ## hh:mm
1850     if (length $1 != 2 or length $9 != 2) {
1851     $self->{onerror}->(node => $input_node,
1852     type => qq'dateortime:syntax error');
1853     }
1854    
1855     ($hour, $minute, $second) = ($1, $9, $10);
1856     }
1857    
1858     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1859     if $hour > 23;
1860     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1861     if $minute > 59;
1862    
1863     if (defined $second) { ## s
1864     ## NOTE: Integer part of second don't have to have length of two.
1865    
1866     if (substr ($second, 0, 1) eq '.') {
1867     $self->{onerror}->(node => $input_node,
1868     type => 'dateortime:syntax error');
1869     }
1870    
1871     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1872     if $second >= 60;
1873     }
1874     } else {
1875     $self->{onerror}->(node => $input_node,
1876     type => 'dateortime:syntax error');
1877     }
1878    
1879 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1880 wakaba 1.1 },
1881     };
1882    
1883     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1884 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1885 wakaba 1.40 %HTMLPhrasingContentChecker,
1886     check_attrs => $GetHTMLAttrsChecker->({
1887 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1888     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1889     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1890     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1891     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1892     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1893     }),
1894     };
1895    
1896     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1897 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
1898 wakaba 1.40 %HTMLPhrasingContentChecker,
1899     check_attrs => $GetHTMLAttrsChecker->({
1900 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1901     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1902     }),
1903     };
1904    
1905     $Element->{$HTML_NS}->{code} = {
1906 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1907 wakaba 1.40 %HTMLPhrasingContentChecker,
1908 wakaba 1.1 };
1909    
1910     $Element->{$HTML_NS}->{var} = {
1911 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1912 wakaba 1.40 %HTMLPhrasingContentChecker,
1913 wakaba 1.1 };
1914    
1915     $Element->{$HTML_NS}->{samp} = {
1916 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1917 wakaba 1.40 %HTMLPhrasingContentChecker,
1918 wakaba 1.1 };
1919    
1920     $Element->{$HTML_NS}->{kbd} = {
1921 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1922 wakaba 1.40 %HTMLPhrasingContentChecker,
1923 wakaba 1.1 };
1924    
1925     $Element->{$HTML_NS}->{sub} = {
1926 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1927 wakaba 1.40 %HTMLPhrasingContentChecker,
1928 wakaba 1.1 };
1929    
1930     $Element->{$HTML_NS}->{sup} = {
1931 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1932 wakaba 1.40 %HTMLPhrasingContentChecker,
1933 wakaba 1.1 };
1934    
1935     $Element->{$HTML_NS}->{span} = {
1936 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1937 wakaba 1.40 %HTMLPhrasingContentChecker,
1938 wakaba 1.1 };
1939    
1940     $Element->{$HTML_NS}->{i} = {
1941 wakaba 1.40 %HTMLPhrasingContentChecker,
1942 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1943 wakaba 1.1 };
1944    
1945     $Element->{$HTML_NS}->{b} = {
1946 wakaba 1.40 %HTMLPhrasingContentChecker,
1947 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1948 wakaba 1.1 };
1949    
1950     $Element->{$HTML_NS}->{bdo} = {
1951 wakaba 1.40 %HTMLPhrasingContentChecker,
1952 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1953 wakaba 1.40 check_attrs => sub {
1954     my ($self, $item, $element_state) = @_;
1955     $GetHTMLAttrsChecker->({})->($self, $item, $element_state);
1956     unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
1957     $self->{onerror}->(node => $item->{node},
1958     type => 'attribute missing:dir');
1959 wakaba 1.1 }
1960     },
1961     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1962     };
1963    
1964 wakaba 1.29 =pod
1965    
1966     ## TODO:
1967    
1968     +
1969     + <p>Partly because of the confusion described above, authors are
1970     + strongly recommended to always mark up all paragraphs with the
1971     + <code>p</code> element, and to not have any <code>ins</code> or
1972     + <code>del</code> elements that cross across any <span
1973     + title="paragraph">implied paragraphs</span>.</p>
1974     +
1975     (An informative note)
1976    
1977     <p><code>ins</code> elements should not cross <span
1978     + title="paragraph">implied paragraph</span> boundaries.</p>
1979     (normative)
1980    
1981     + <p><code>del</code> elements should not cross <span
1982     + title="paragraph">implied paragraph</span> boundaries.</p>
1983     (normative)
1984    
1985     =cut
1986    
1987 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
1988 wakaba 1.40 %HTMLTransparentChecker,
1989 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1990 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
1991 wakaba 1.1 cite => $HTMLURIAttrChecker,
1992     datetime => $HTMLDatetimeAttrChecker,
1993     }),
1994     };
1995    
1996     $Element->{$HTML_NS}->{del} = {
1997 wakaba 1.40 %HTMLTransparentChecker,
1998 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
1999 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2000 wakaba 1.1 cite => $HTMLURIAttrChecker,
2001     datetime => $HTMLDatetimeAttrChecker,
2002     }),
2003 wakaba 1.40 check_end => sub {
2004     my ($self, $item, $element_state) = @_;
2005     if ($element_state->{has_significant}) {
2006     ## NOTE: Significantness flag does not propagate.
2007     } elsif ($item->{transparent}) {
2008     #
2009     } else {
2010     $self->{onerror}->(node => $item->{node},
2011     level => $self->{should_level},
2012     type => 'no significant content');
2013     }
2014 wakaba 1.1 },
2015     };
2016    
2017 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
2018 wakaba 1.40 %HTMLProseContentChecker,
2019 wakaba 1.48 status => FEATURE_HTML5_FD,
2020 wakaba 1.41 ## NOTE: legend, Prose | Prose, legend
2021     check_child_element => sub {
2022     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2023     $child_is_transparent, $element_state) = @_;
2024     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2025     $self->{onerror}->(node => $child_el,
2026     type => 'element not allowed:minus',
2027     level => $self->{must_level});
2028     $element_state->{has_non_legend} = 1;
2029     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2030     #
2031     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2032     if ($element_state->{has_legend_at_first}) {
2033     $self->{onerror}->(node => $child_el,
2034     type => 'element not allowed:figure legend',
2035     level => $self->{must_level});
2036     } elsif ($element_state->{has_legend}) {
2037     $self->{onerror}->(node => $element_state->{has_legend},
2038     type => 'element not allowed:figure legend',
2039     level => $self->{must_level});
2040     $element_state->{has_legend} = $child_el;
2041     } elsif ($element_state->{has_non_legend}) {
2042     $element_state->{has_legend} = $child_el;
2043     } else {
2044     $element_state->{has_legend_at_first} = 1;
2045 wakaba 1.35 }
2046 wakaba 1.41 delete $element_state->{has_non_legend};
2047     } else {
2048     $HTMLProseContentChecker{check_child_element}->(@_);
2049 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2050 wakaba 1.41 }
2051     },
2052     check_child_text => sub {
2053     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2054     if ($has_significant) {
2055     $element_state->{has_non_legend} = 1;
2056 wakaba 1.35 }
2057 wakaba 1.41 },
2058     check_end => sub {
2059     my ($self, $item, $element_state) = @_;
2060 wakaba 1.35
2061 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2062     #
2063     } elsif ($element_state->{has_legend}) {
2064     if ($element_state->{has_non_legend}) {
2065     $self->{onerror}->(node => $element_state->{has_legend},
2066 wakaba 1.35 type => 'element not allowed:figure legend',
2067     level => $self->{must_level});
2068     }
2069     } else {
2070 wakaba 1.41 $self->{onerror}->(node => $item->{node},
2071 wakaba 1.35 type => 'element missing:legend',
2072     level => $self->{must_level});
2073     }
2074 wakaba 1.41
2075     $HTMLProseContentChecker{check_end}->(@_);
2076     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2077 wakaba 1.35 },
2078     };
2079 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2080 wakaba 1.1
2081     $Element->{$HTML_NS}->{img} = {
2082 wakaba 1.40 %HTMLEmptyChecker,
2083 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2084 wakaba 1.40 check_attrs => sub {
2085     my ($self, $item, $element_state) = @_;
2086 wakaba 1.1 $GetHTMLAttrsChecker->({
2087     alt => sub { }, ## NOTE: No syntactical requirement
2088     src => $HTMLURIAttrChecker,
2089     usemap => $HTMLUsemapAttrChecker,
2090     ismap => sub {
2091 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2092     if (not $self->{flag}->{in_a_href}) {
2093 wakaba 1.15 $self->{onerror}->(node => $attr,
2094     type => 'attribute not allowed:ismap');
2095 wakaba 1.1 }
2096 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2097 wakaba 1.1 },
2098     ## TODO: height
2099     ## TODO: width
2100 wakaba 1.40 })->($self, $item);
2101     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2102     $self->{onerror}->(node => $item->{node},
2103 wakaba 1.37 type => 'attribute missing:alt',
2104     level => $self->{should_level});
2105 wakaba 1.1 }
2106 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2107     $self->{onerror}->(node => $item->{node},
2108     type => 'attribute missing:src');
2109 wakaba 1.1 }
2110     },
2111     };
2112    
2113     $Element->{$HTML_NS}->{iframe} = {
2114 wakaba 1.40 %HTMLTextChecker,
2115 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2116     ## NOTE: Not part of HTML4 Strict
2117 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2118 wakaba 1.1 src => $HTMLURIAttrChecker,
2119     }),
2120 wakaba 1.40 };
2121    
2122 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2123 wakaba 1.40 %HTMLEmptyChecker,
2124 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2125 wakaba 1.40 check_attrs => sub {
2126     my ($self, $item, $element_state) = @_;
2127 wakaba 1.1 my $has_src;
2128 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2129 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2130     $attr_ns = '' unless defined $attr_ns;
2131     my $attr_ln = $attr->manakai_local_name;
2132     my $checker;
2133     if ($attr_ns eq '') {
2134     if ($attr_ln eq 'src') {
2135     $checker = $HTMLURIAttrChecker;
2136     $has_src = 1;
2137     } elsif ($attr_ln eq 'type') {
2138     $checker = $HTMLIMTAttrChecker;
2139     } else {
2140     ## TODO: height
2141     ## TODO: width
2142     $checker = $HTMLAttrChecker->{$attr_ln}
2143     || sub { }; ## NOTE: Any local attribute is ok.
2144     }
2145     }
2146     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2147     || $AttrChecker->{$attr_ns}->{''};
2148     if ($checker) {
2149     $checker->($self, $attr);
2150     } else {
2151     $self->{onerror}->(node => $attr, level => 'unsupported',
2152     type => 'attribute');
2153     ## ISSUE: No comformance createria for global attributes in the spec
2154     }
2155     }
2156    
2157     unless ($has_src) {
2158 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2159 wakaba 1.1 type => 'attribute missing:src');
2160     }
2161     },
2162     };
2163    
2164     $Element->{$HTML_NS}->{object} = {
2165 wakaba 1.40 %HTMLTransparentChecker,
2166 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2167 wakaba 1.40 check_attrs => sub {
2168     my ($self, $item, $element_state) = @_;
2169 wakaba 1.1 $GetHTMLAttrsChecker->({
2170     data => $HTMLURIAttrChecker,
2171     type => $HTMLIMTAttrChecker,
2172     usemap => $HTMLUsemapAttrChecker,
2173     ## TODO: width
2174     ## TODO: height
2175 wakaba 1.40 })->($self, $item);
2176     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2177     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2178     $self->{onerror}->(node => $item->{node},
2179 wakaba 1.1 type => 'attribute missing:data|type');
2180     }
2181     }
2182     },
2183 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2184     check_child_element => sub {
2185     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2186     $child_is_transparent, $element_state) = @_;
2187     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2188     $self->{onerror}->(node => $child_el,
2189     type => 'element not allowed:minus',
2190     level => $self->{must_level});
2191     $element_state->{has_non_legend} = 1;
2192     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2193     #
2194     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2195     if ($element_state->{has_non_param}) {
2196     $self->{onerror}->(node => $child_el,
2197     type => 'element not allowed:prose',
2198     level => $self->{must_level});
2199 wakaba 1.39 }
2200 wakaba 1.41 } else {
2201     $HTMLProseContentChecker{check_child_element}->(@_);
2202     $element_state->{has_non_param} = 1;
2203 wakaba 1.39 }
2204 wakaba 1.25 },
2205 wakaba 1.41 check_child_text => sub {
2206     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2207     if ($has_significant) {
2208     $element_state->{has_non_param} = 1;
2209     }
2210 wakaba 1.42 },
2211     check_end => sub {
2212     my ($self, $item, $element_state) = @_;
2213     if ($element_state->{has_significant}) {
2214 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2215 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2216     ## NOTE: Transparent.
2217     } else {
2218     $self->{onerror}->(node => $item->{node},
2219     level => $self->{should_level},
2220     type => 'no significant content');
2221     }
2222     },
2223 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2224 wakaba 1.1 };
2225 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2226     ## What about |<section><object data><style scoped></style>x</object></section>|?
2227     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2228 wakaba 1.1
2229     $Element->{$HTML_NS}->{param} = {
2230 wakaba 1.40 %HTMLEmptyChecker,
2231 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2232 wakaba 1.40 check_attrs => sub {
2233     my ($self, $item, $element_state) = @_;
2234 wakaba 1.1 $GetHTMLAttrsChecker->({
2235     name => sub { },
2236     value => sub { },
2237 wakaba 1.40 })->($self, $item);
2238     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2239     $self->{onerror}->(node => $item->{node},
2240 wakaba 1.1 type => 'attribute missing:name');
2241     }
2242 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2243     $self->{onerror}->(node => $item->{node},
2244 wakaba 1.1 type => 'attribute missing:value');
2245     }
2246     },
2247     };
2248    
2249     $Element->{$HTML_NS}->{video} = {
2250 wakaba 1.40 %HTMLTransparentChecker,
2251 wakaba 1.48 status => FEATURE_HTML5_LC,
2252 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2253 wakaba 1.1 src => $HTMLURIAttrChecker,
2254     ## TODO: start, loopstart, loopend, end
2255     ## ISSUE: they MUST be "value time offset"s. Value?
2256 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2257 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2258     controls => $GetHTMLBooleanAttrChecker->('controls'),
2259 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2260 wakaba 1.42 ## TODO: width, height
2261 wakaba 1.1 }),
2262 wakaba 1.42 check_start => sub {
2263     my ($self, $item, $element_state) = @_;
2264     $element_state->{allow_source}
2265     = not $item->{node}->has_attribute_ns (undef, 'src');
2266     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2267     ## NOTE: It might be set true by |check_element|.
2268     },
2269     check_child_element => sub {
2270     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2271     $child_is_transparent, $element_state) = @_;
2272     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2273     $self->{onerror}->(node => $child_el,
2274     type => 'element not allowed:minus',
2275     level => $self->{must_level});
2276     delete $element_state->{allow_source};
2277     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2278     #
2279     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2280 wakaba 1.45 unless ($element_state->{allow_source}) {
2281 wakaba 1.42 $self->{onerror}->(node => $child_el,
2282     type => 'element not allowed:prose',
2283     level => $self->{must_level});
2284     }
2285 wakaba 1.45 $element_state->{has_source} = 1;
2286 wakaba 1.1 } else {
2287 wakaba 1.42 delete $element_state->{allow_source};
2288     $HTMLProseContentChecker{check_child_element}->(@_);
2289     }
2290     },
2291     check_child_text => sub {
2292     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2293     if ($has_significant) {
2294     delete $element_state->{allow_source};
2295     }
2296     $HTMLProseContentChecker{check_child_text}->(@_);
2297     },
2298     check_end => sub {
2299     my ($self, $item, $element_state) = @_;
2300     if ($element_state->{has_source} == -1) {
2301     $self->{onerror}->(node => $item->{node},
2302     type => 'element missing:source',
2303     level => $self->{must_level});
2304 wakaba 1.1 }
2305 wakaba 1.42
2306     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2307 wakaba 1.1 },
2308     };
2309    
2310     $Element->{$HTML_NS}->{audio} = {
2311 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2312 wakaba 1.48 status => FEATURE_HTML5_LC,
2313 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2314     src => $HTMLURIAttrChecker,
2315     ## TODO: start, loopstart, loopend, end
2316     ## ISSUE: they MUST be "value time offset"s. Value?
2317     ## ISSUE: playcount has no conformance creteria
2318     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2319     controls => $GetHTMLBooleanAttrChecker->('controls'),
2320     }),
2321 wakaba 1.1 };
2322    
2323     $Element->{$HTML_NS}->{source} = {
2324 wakaba 1.40 %HTMLEmptyChecker,
2325 wakaba 1.48 status => FEATURE_HTML5_DEFAULT,
2326 wakaba 1.40 check_attrs => sub {
2327     my ($self, $item, $element_state) = @_;
2328 wakaba 1.1 $GetHTMLAttrsChecker->({
2329     src => $HTMLURIAttrChecker,
2330     type => $HTMLIMTAttrChecker,
2331     media => $HTMLMQAttrChecker,
2332 wakaba 1.40 })->($self, $item, $element_state);
2333     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2334     $self->{onerror}->(node => $item->{node},
2335 wakaba 1.1 type => 'attribute missing:src');
2336     }
2337     },
2338     };
2339    
2340     $Element->{$HTML_NS}->{canvas} = {
2341 wakaba 1.40 %HTMLTransparentChecker,
2342 wakaba 1.48 status => FEATURE_HTML5_LC,
2343 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2344 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2345     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2346     }),
2347     };
2348    
2349     $Element->{$HTML_NS}->{map} = {
2350 wakaba 1.40 %HTMLProseContentChecker,
2351 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2352 wakaba 1.40 check_attrs => sub {
2353     my ($self, $item, $element_state) = @_;
2354 wakaba 1.4 my $has_id;
2355     $GetHTMLAttrsChecker->({
2356     id => sub {
2357     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2358     my ($self, $attr) = @_;
2359     my $value = $attr->value;
2360     if (length $value > 0) {
2361     if ($self->{id}->{$value}) {
2362     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2363     push @{$self->{id}->{$value}}, $attr;
2364     } else {
2365     $self->{id}->{$value} = [$attr];
2366     }
2367 wakaba 1.1 } else {
2368 wakaba 1.4 ## NOTE: MUST contain at least one character
2369     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2370 wakaba 1.1 }
2371 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2372     $self->{onerror}->(node => $attr, type => 'space in ID');
2373     }
2374     $self->{map}->{$value} ||= $attr;
2375     $has_id = 1;
2376     },
2377 wakaba 1.40 })->($self, $item, $element_state);
2378     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2379 wakaba 1.4 unless $has_id;
2380     },
2381 wakaba 1.1 };
2382    
2383     $Element->{$HTML_NS}->{area} = {
2384 wakaba 1.40 %HTMLEmptyChecker,
2385 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2386 wakaba 1.40 check_attrs => sub {
2387     my ($self, $item, $element_state) = @_;
2388 wakaba 1.1 my %attr;
2389     my $coords;
2390 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2391 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2392     $attr_ns = '' unless defined $attr_ns;
2393     my $attr_ln = $attr->manakai_local_name;
2394     my $checker;
2395     if ($attr_ns eq '') {
2396     $checker = {
2397     alt => sub { },
2398     ## NOTE: |alt| value has no conformance creteria.
2399     shape => $GetHTMLEnumeratedAttrChecker->({
2400     circ => -1, circle => 1,
2401     default => 1,
2402     poly => 1, polygon => -1,
2403     rect => 1, rectangle => -1,
2404     }),
2405     coords => sub {
2406     my ($self, $attr) = @_;
2407     my $value = $attr->value;
2408     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2409     $coords = [split /,/, $value];
2410     } else {
2411     $self->{onerror}->(node => $attr,
2412     type => 'coords:syntax error');
2413     }
2414     },
2415     target => $HTMLTargetAttrChecker,
2416     href => $HTMLURIAttrChecker,
2417     ping => $HTMLSpaceURIsAttrChecker,
2418 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2419 wakaba 1.1 media => $HTMLMQAttrChecker,
2420     hreflang => $HTMLLanguageTagAttrChecker,
2421     type => $HTMLIMTAttrChecker,
2422     }->{$attr_ln};
2423     if ($checker) {
2424     $attr{$attr_ln} = $attr;
2425     } else {
2426     $checker = $HTMLAttrChecker->{$attr_ln};
2427     }
2428     }
2429     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2430     || $AttrChecker->{$attr_ns}->{''};
2431     if ($checker) {
2432     $checker->($self, $attr) if ref $checker;
2433     } else {
2434     $self->{onerror}->(node => $attr, level => 'unsupported',
2435     type => 'attribute');
2436     ## ISSUE: No comformance createria for unknown attributes in the spec
2437     }
2438     }
2439    
2440     if (defined $attr{href}) {
2441 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2442 wakaba 1.1 unless (defined $attr{alt}) {
2443 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2444 wakaba 1.1 type => 'attribute missing:alt');
2445     }
2446     } else {
2447     for (qw/target ping rel media hreflang type alt/) {
2448     if (defined $attr{$_}) {
2449     $self->{onerror}->(node => $attr{$_},
2450     type => 'attribute not allowed');
2451     }
2452     }
2453     }
2454    
2455     my $shape = 'rectangle';
2456     if (defined $attr{shape}) {
2457     $shape = {
2458     circ => 'circle', circle => 'circle',
2459     default => 'default',
2460     poly => 'polygon', polygon => 'polygon',
2461     rect => 'rectangle', rectangle => 'rectangle',
2462     }->{lc $attr{shape}->value} || 'rectangle';
2463     ## TODO: ASCII lowercase?
2464     }
2465    
2466     if ($shape eq 'circle') {
2467     if (defined $attr{coords}) {
2468     if (defined $coords) {
2469     if (@$coords == 3) {
2470     if ($coords->[2] < 0) {
2471     $self->{onerror}->(node => $attr{coords},
2472     type => 'coords:out of range:2');
2473     }
2474     } else {
2475     $self->{onerror}->(node => $attr{coords},
2476     type => 'coords:number:3:'.@$coords);
2477     }
2478     } else {
2479     ## NOTE: A syntax error has been reported.
2480     }
2481     } else {
2482 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2483 wakaba 1.1 type => 'attribute missing:coords');
2484     }
2485     } elsif ($shape eq 'default') {
2486     if (defined $attr{coords}) {
2487     $self->{onerror}->(node => $attr{coords},
2488     type => 'attribute not allowed');
2489     }
2490     } elsif ($shape eq 'polygon') {
2491     if (defined $attr{coords}) {
2492     if (defined $coords) {
2493     if (@$coords >= 6) {
2494     unless (@$coords % 2 == 0) {
2495     $self->{onerror}->(node => $attr{coords},
2496     type => 'coords:number:even:'.@$coords);
2497     }
2498     } else {
2499     $self->{onerror}->(node => $attr{coords},
2500     type => 'coords:number:>=6:'.@$coords);
2501     }
2502     } else {
2503     ## NOTE: A syntax error has been reported.
2504     }
2505     } else {
2506 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2507 wakaba 1.1 type => 'attribute missing:coords');
2508     }
2509     } elsif ($shape eq 'rectangle') {
2510     if (defined $attr{coords}) {
2511     if (defined $coords) {
2512     if (@$coords == 4) {
2513     unless ($coords->[0] < $coords->[2]) {
2514     $self->{onerror}->(node => $attr{coords},
2515     type => 'coords:out of range:0');
2516     }
2517     unless ($coords->[1] < $coords->[3]) {
2518     $self->{onerror}->(node => $attr{coords},
2519     type => 'coords:out of range:1');
2520     }
2521     } else {
2522     $self->{onerror}->(node => $attr{coords},
2523     type => 'coords:number:4:'.@$coords);
2524     }
2525     } else {
2526     ## NOTE: A syntax error has been reported.
2527     }
2528     } else {
2529 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2530 wakaba 1.1 type => 'attribute missing:coords');
2531     }
2532     }
2533     },
2534     };
2535     ## TODO: only in map
2536    
2537     $Element->{$HTML_NS}->{table} = {
2538 wakaba 1.40 %HTMLChecker,
2539 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2540 wakaba 1.40 check_start => sub {
2541     my ($self, $item, $element_state) = @_;
2542     $element_state->{phase} = 'before caption';
2543     },
2544     check_child_element => sub {
2545     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2546     $child_is_transparent, $element_state) = @_;
2547     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2548     $self->{onerror}->(node => $child_el,
2549     type => 'element not allowed:minus',
2550     level => $self->{must_level});
2551     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2552     #
2553     } elsif ($element_state->{phase} eq 'in tbodys') {
2554     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2555     #$element_state->{phase} = 'in tbodys';
2556     } elsif (not $element_state->{has_tfoot} and
2557     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2558     $element_state->{phase} = 'after tfoot';
2559     $element_state->{has_tfoot} = 1;
2560     } else {
2561     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2562     }
2563     } elsif ($element_state->{phase} eq 'in trs') {
2564     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2565     #$element_state->{phase} = 'in trs';
2566     } elsif (not $element_state->{has_tfoot} and
2567     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2568     $element_state->{phase} = 'after tfoot';
2569     $element_state->{has_tfoot} = 1;
2570     } else {
2571     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2572     }
2573     } elsif ($element_state->{phase} eq 'after thead') {
2574     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2575     $element_state->{phase} = 'in tbodys';
2576     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2577     $element_state->{phase} = 'in trs';
2578     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2579     $element_state->{phase} = 'in tbodys';
2580     $element_state->{has_tfoot} = 1;
2581     } else {
2582     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2583     }
2584     } elsif ($element_state->{phase} eq 'in colgroup') {
2585     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2586     $element_state->{phase} = 'in colgroup';
2587     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2588     $element_state->{phase} = 'after thead';
2589     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2590     $element_state->{phase} = 'in tbodys';
2591     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2592     $element_state->{phase} = 'in trs';
2593     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2594     $element_state->{phase} = 'in tbodys';
2595     $element_state->{has_tfoot} = 1;
2596     } else {
2597     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2598     }
2599     } elsif ($element_state->{phase} eq 'before caption') {
2600     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
2601     $element_state->{phase} = 'in colgroup';
2602     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2603     $element_state->{phase} = 'in colgroup';
2604     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2605     $element_state->{phase} = 'after thead';
2606     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2607     $element_state->{phase} = 'in tbodys';
2608     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2609     $element_state->{phase} = 'in trs';
2610     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2611     $element_state->{phase} = 'in tbodys';
2612     $element_state->{has_tfoot} = 1;
2613     } else {
2614     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2615     }
2616     } elsif ($element_state->{phase} eq 'after tfoot') {
2617     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2618     } else {
2619     die "check_child_element: Bad |table| phase: $element_state->{phase}";
2620     }
2621     },
2622     check_child_text => sub {
2623     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2624     if ($has_significant) {
2625     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2626 wakaba 1.1 }
2627 wakaba 1.40 },
2628     check_end => sub {
2629     my ($self, $item, $element_state) = @_;
2630 wakaba 1.1
2631     ## Table model errors
2632     require Whatpm::HTMLTable;
2633 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
2634 wakaba 1.1 my %opt = @_;
2635     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2636     });
2637 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
2638 wakaba 1.1
2639 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2640 wakaba 1.1 },
2641     };
2642    
2643     $Element->{$HTML_NS}->{caption} = {
2644 wakaba 1.40 %HTMLPhrasingContentChecker,
2645 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2646 wakaba 1.1 };
2647    
2648     $Element->{$HTML_NS}->{colgroup} = {
2649 wakaba 1.40 %HTMLEmptyChecker,
2650 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2651 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2652 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2653     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2654     ## TODO: "attribute not supported" if |col|.
2655     ## ISSUE: MUST NOT if any |col|?
2656     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2657     }),
2658 wakaba 1.40 check_child_element => sub {
2659     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2660     $child_is_transparent, $element_state) = @_;
2661     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2662     $self->{onerror}->(node => $child_el,
2663     type => 'element not allowed:minus',
2664     level => $self->{must_level});
2665     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2666     #
2667     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
2668     #
2669     } else {
2670     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2671     }
2672     },
2673     check_child_text => sub {
2674     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2675     if ($has_significant) {
2676     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2677 wakaba 1.1 }
2678     },
2679     };
2680    
2681     $Element->{$HTML_NS}->{col} = {
2682 wakaba 1.40 %HTMLEmptyChecker,
2683 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2684 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2685 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2686     }),
2687     };
2688    
2689     $Element->{$HTML_NS}->{tbody} = {
2690 wakaba 1.40 %HTMLChecker,
2691 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2692 wakaba 1.40 check_child_element => sub {
2693     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2694     $child_is_transparent, $element_state) = @_;
2695     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2696     $self->{onerror}->(node => $child_el,
2697     type => 'element not allowed:minus',
2698     level => $self->{must_level});
2699     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2700     #
2701     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2702     $element_state->{has_tr} = 1;
2703     } else {
2704     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2705     }
2706     },
2707     check_child_text => sub {
2708     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2709     if ($has_significant) {
2710     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2711 wakaba 1.1 }
2712 wakaba 1.40 },
2713     check_end => sub {
2714     my ($self, $item, $element_state) = @_;
2715     unless ($element_state->{has_tr}) {
2716     $self->{onerror}->(node => $item->{node},
2717     type => 'child element missing:tr');
2718 wakaba 1.1 }
2719 wakaba 1.40
2720     $HTMLChecker{check_end}->(@_);
2721 wakaba 1.1 },
2722     };
2723    
2724     $Element->{$HTML_NS}->{thead} = {
2725 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2726 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2727 wakaba 1.1 };
2728    
2729     $Element->{$HTML_NS}->{tfoot} = {
2730 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2731 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2732 wakaba 1.1 };
2733    
2734     $Element->{$HTML_NS}->{tr} = {
2735 wakaba 1.40 %HTMLChecker,
2736 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2737 wakaba 1.40 check_child_element => sub {
2738     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2739     $child_is_transparent, $element_state) = @_;
2740     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2741     $self->{onerror}->(node => $child_el,
2742     type => 'element not allowed:minus',
2743     level => $self->{must_level});
2744     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2745     #
2746     } elsif ($child_nsuri eq $HTML_NS and
2747     ($child_ln eq 'td' or $child_ln eq 'th')) {
2748     $element_state->{has_cell} = 1;
2749     } else {
2750     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2751     }
2752     },
2753     check_child_text => sub {
2754     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2755     if ($has_significant) {
2756     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2757 wakaba 1.1 }
2758 wakaba 1.40 },
2759     check_end => sub {
2760     my ($self, $item, $element_state) = @_;
2761     unless ($element_state->{has_cell}) {
2762     $self->{onerror}->(node => $item->{node},
2763     type => 'child element missing:td|th');
2764 wakaba 1.1 }
2765 wakaba 1.40
2766     $HTMLChecker{check_end}->(@_);
2767 wakaba 1.1 },
2768     };
2769    
2770     $Element->{$HTML_NS}->{td} = {
2771 wakaba 1.40 %HTMLProseContentChecker,
2772 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2773 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2774 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2775     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2776     }),
2777     };
2778    
2779     $Element->{$HTML_NS}->{th} = {
2780 wakaba 1.40 %HTMLPhrasingContentChecker,
2781 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2782 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2783 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2784     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2785     scope => $GetHTMLEnumeratedAttrChecker
2786     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2787     }),
2788     };
2789    
2790     ## TODO: forms
2791 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2792 wakaba 1.1
2793     $Element->{$HTML_NS}->{script} = {
2794 wakaba 1.40 %HTMLChecker,
2795 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2796 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2797 wakaba 1.1 src => $HTMLURIAttrChecker,
2798     defer => $GetHTMLBooleanAttrChecker->('defer'),
2799     async => $GetHTMLBooleanAttrChecker->('async'),
2800     type => $HTMLIMTAttrChecker,
2801 wakaba 1.9 }),
2802 wakaba 1.40 check_start => sub {
2803     my ($self, $item, $element_state) = @_;
2804 wakaba 1.1
2805 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
2806     $element_state->{must_be_empty} = 1;
2807 wakaba 1.1 } else {
2808     ## NOTE: No content model conformance in HTML5 spec.
2809 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2810     my $language = $item->{node}->get_attribute_ns (undef, 'language');
2811 wakaba 1.1 if ((defined $type and $type eq '') or
2812     (defined $language and $language eq '')) {
2813     $type = 'text/javascript';
2814     } elsif (defined $type) {
2815     #
2816     } elsif (defined $language) {
2817     $type = 'text/' . $language;
2818     } else {
2819     $type = 'text/javascript';
2820     }
2821 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
2822     }
2823     },
2824     check_child_element => sub {
2825     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2826     $child_is_transparent, $element_state) = @_;
2827     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2828     $self->{onerror}->(node => $child_el,
2829     type => 'element not allowed:minus',
2830     level => $self->{must_level});
2831     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2832     #
2833     } else {
2834     if ($element_state->{must_be_empty}) {
2835     $self->{onerror}->(node => $child_el,
2836     type => 'element not allowed');
2837     }
2838     }
2839     },
2840     check_child_text => sub {
2841     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2842     if ($has_significant and
2843     $element_state->{must_be_empty}) {
2844     $self->{onerror}->(node => $child_node,
2845     type => 'character not allowed');
2846     }
2847     },
2848     check_end => sub {
2849     my ($self, $item, $element_state) = @_;
2850     unless ($element_state->{must_be_empty}) {
2851     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
2852     type => 'script:'.$element_state->{script_type});
2853     ## TODO: text/javascript support
2854    
2855     $HTMLChecker{check_end}->(@_);
2856 wakaba 1.1 }
2857     },
2858     };
2859 wakaba 1.25 ## ISSUE: Significant check and text child node
2860 wakaba 1.1
2861     ## NOTE: When script is disabled.
2862     $Element->{$HTML_NS}->{noscript} = {
2863 wakaba 1.40 %HTMLTransparentChecker,
2864 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
2865 wakaba 1.40 check_start => sub {
2866     my ($self, $item, $element_state) = @_;
2867 wakaba 1.3
2868 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2869     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
2870 wakaba 1.3 }
2871    
2872 wakaba 1.40 unless ($self->{flag}->{in_head}) {
2873     $self->_add_minus_elements ($element_state,
2874     {$HTML_NS => {noscript => 1}});
2875     }
2876 wakaba 1.3 },
2877 wakaba 1.40 check_child_element => sub {
2878     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2879     $child_is_transparent, $element_state) = @_;
2880     if ($self->{flag}->{in_head}) {
2881     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2882     $self->{onerror}->(node => $child_el,
2883     type => 'element not allowed:minus',
2884     level => $self->{must_level});
2885     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2886     #
2887     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
2888     #
2889     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
2890     if ($child_el->has_attribute_ns (undef, 'scoped')) {
2891     $self->{onerror}->(node => $child_el,
2892     type => 'element not allowed:head noscript',
2893     level => $self->{must_level});
2894     }
2895     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
2896 wakaba 1.47 my $http_equiv_attr
2897     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
2898     if ($http_equiv_attr) {
2899     ## TODO: case
2900     if (lc $http_equiv_attr->value eq 'content-type') {
2901 wakaba 1.40 $self->{onerror}->(node => $child_el,
2902 wakaba 1.34 type => 'element not allowed:head noscript',
2903     level => $self->{must_level});
2904 wakaba 1.47 } else {
2905     #
2906 wakaba 1.3 }
2907 wakaba 1.47 } else {
2908     $self->{onerror}->(node => $child_el,
2909     type => 'element not allowed:head noscript',
2910     level => $self->{must_level});
2911 wakaba 1.3 }
2912 wakaba 1.40 } else {
2913     $self->{onerror}->(node => $child_el,
2914     type => 'element not allowed:head noscript',
2915     level => $self->{must_level});
2916     }
2917     } else {
2918     $HTMLTransparentChecker{check_child_element}->(@_);
2919     }
2920     },
2921     check_child_text => sub {
2922     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2923     if ($self->{flag}->{in_head}) {
2924     if ($has_significant) {
2925     $self->{onerror}->(node => $child_node,
2926     type => 'character not allowed');
2927 wakaba 1.3 }
2928     } else {
2929 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
2930     }
2931     },
2932     check_end => sub {
2933     my ($self, $item, $element_state) = @_;
2934     $self->_remove_minus_elements ($element_state);
2935     if ($self->{flag}->{in_head}) {
2936     $HTMLChecker{check_end}->(@_);
2937     } else {
2938     $HTMLPhrasingContentChecker{check_end}->(@_);
2939 wakaba 1.3 }
2940 wakaba 1.1 },
2941     };
2942 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2943 wakaba 1.1
2944     $Element->{$HTML_NS}->{'event-source'} = {
2945 wakaba 1.40 %HTMLEmptyChecker,
2946 wakaba 1.48 status => FEATURE_HTML5_LC,
2947 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2948 wakaba 1.1 src => $HTMLURIAttrChecker,
2949     }),
2950     };
2951    
2952     $Element->{$HTML_NS}->{details} = {
2953 wakaba 1.40 %HTMLProseContentChecker,
2954 wakaba 1.48 status => FEATURE_HTML5_WD,
2955 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
2956 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
2957     }),
2958 wakaba 1.43 ## NOTE: legend, Prose
2959     check_child_element => sub {
2960     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2961     $child_is_transparent, $element_state) = @_;
2962     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2963     $self->{onerror}->(node => $child_el,
2964     type => 'element not allowed:minus',
2965     level => $self->{must_level});
2966     $element_state->{has_non_legend} = 1;
2967     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2968     #
2969     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2970     if ($element_state->{has_non_legend}) {
2971     $self->{onerror}->(node => $child_el,
2972     type => 'element not allowed:details legend',
2973     level => $self->{must_level});
2974     }
2975     $element_state->{has_legend} = 1;
2976     $element_state->{has_non_legend} = 1;
2977     } else {
2978     $HTMLProseContentChecker{check_child_element}->(@_);
2979     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2980     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
2981     ## is conforming?
2982     }
2983     },
2984     check_child_text => sub {
2985     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2986     if ($has_significant) {
2987     $element_state->{has_non_legend} = 1;
2988     }
2989     },
2990     check_end => sub {
2991     my ($self, $item, $element_state) = @_;
2992 wakaba 1.1
2993 wakaba 1.43 unless ($element_state->{has_legend}) {
2994     $self->{onerror}->(node => $item->{node},
2995     type => 'element missing:legend',
2996     level => $self->{must_level});
2997     }
2998    
2999     $HTMLProseContentChecker{check_end}->(@_);
3000     ## ISSUE: |<details><legend>aa</legend></details>| error?
3001 wakaba 1.1 },
3002     };
3003    
3004     $Element->{$HTML_NS}->{datagrid} = {
3005 wakaba 1.40 %HTMLProseContentChecker,
3006 wakaba 1.48 status => FEATURE_HTML5_WD,
3007 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3008 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3009     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
3010     }),
3011 wakaba 1.40 check_start => sub {
3012     my ($self, $item, $element_state) = @_;
3013 wakaba 1.1
3014 wakaba 1.40 $self->_add_minus_elements ($element_state,
3015     {$HTML_NS => {a => 1, datagrid => 1}});
3016     $element_state->{phase} = 'any';
3017     },
3018     ## Prose -(text* table Prose*) | table | select | datalist | Empty
3019     check_child_element => sub {
3020     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3021     $child_is_transparent, $element_state) = @_;
3022     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3023     $self->{onerror}->(node => $child_el,
3024     type => 'element not allowed:minus',
3025     level => $self->{must_level});
3026     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3027     #
3028     } elsif ($element_state->{phase} eq 'prose') {
3029     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
3030 wakaba 1.44 if (not $element_state->{has_element} and
3031 wakaba 1.40 $child_nsuri eq $HTML_NS and
3032     $child_ln eq 'table') {
3033     $self->{onerror}->(node => $child_el,
3034     type => 'element not allowed');
3035     } else {
3036 wakaba 1.8 #
3037 wakaba 1.1 }
3038 wakaba 1.40 } else {
3039     $self->{onerror}->(node => $child_el,
3040     type => 'element not allowed');
3041     }
3042 wakaba 1.43 $element_state->{has_element} = 1;
3043 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
3044     if ($child_nsuri eq $HTML_NS and
3045     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
3046     $element_state->{phase} = 'none';
3047     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
3048     $element_state->{has_element} = 1;
3049     $element_state->{phase} = 'prose';
3050 wakaba 1.43 ## TODO: transparent?
3051 wakaba 1.40 } else {
3052     $self->{onerror}->(node => $child_el,
3053     type => 'element not allowed');
3054     }
3055     } elsif ($element_state->{phase} eq 'none') {
3056     $self->{onerror}->(node => $child_el,
3057     type => 'element not allowed');
3058     } else {
3059     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
3060     }
3061     },
3062     check_child_text => sub {
3063     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3064     if ($has_significant) {
3065     if ($element_state->{phase} eq 'prose') {
3066     #
3067     } elsif ($element_state->{phase} eq 'any') {
3068     $element_state->{phase} = 'prose';
3069     } else {
3070     $self->{onerror}->(node => $child_node,
3071     type => 'character not allowed');
3072 wakaba 1.1 }
3073     }
3074 wakaba 1.40 },
3075     check_end => sub {
3076     my ($self, $item, $element_state) = @_;
3077     $self->_remove_minus_elements ($element_state);
3078 wakaba 1.1
3079 wakaba 1.40 if ($element_state->{phase} eq 'none') {
3080     $HTMLChecker{check_end}->(@_);
3081     } else {
3082     $HTMLPhrasingContentChecker{check_end}->(@_);
3083     }
3084     },
3085 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
3086     ## are not disallowed (assuming that form control contents are also
3087     ## prose content).
3088 wakaba 1.1 };
3089    
3090     $Element->{$HTML_NS}->{command} = {
3091 wakaba 1.40 %HTMLEmptyChecker,
3092 wakaba 1.48 status => FEATURE_HTML5_WD,
3093 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3094 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
3095     default => $GetHTMLBooleanAttrChecker->('default'),
3096     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3097     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3098     icon => $HTMLURIAttrChecker,
3099     label => sub { }, ## NOTE: No conformance creteria
3100     radiogroup => sub { }, ## NOTE: No conformance creteria
3101     ## NOTE: |title| has special semantics, but no syntactical difference
3102     type => sub {
3103     my ($self, $attr) = @_;
3104     my $value = $attr->value;
3105     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3106     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3107     }
3108     },
3109     }),
3110     };
3111    
3112     $Element->{$HTML_NS}->{menu} = {
3113 wakaba 1.40 %HTMLPhrasingContentChecker,
3114 wakaba 1.48 status => FEATURE_HTML5_WD,
3115     ## NOTE: HTML4 Deprecated; Reintroduced in HTML5 with different semantics
3116 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3117 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3118     id => sub {
3119     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3120     my ($self, $attr) = @_;
3121     my $value = $attr->value;
3122     if (length $value > 0) {
3123     if ($self->{id}->{$value}) {
3124     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3125     push @{$self->{id}->{$value}}, $attr;
3126     } else {
3127     $self->{id}->{$value} = [$attr];
3128     }
3129     } else {
3130     ## NOTE: MUST contain at least one character
3131     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3132     }
3133     if ($value =~ /[\x09-\x0D\x20]/) {
3134     $self->{onerror}->(node => $attr, type => 'space in ID');
3135     }
3136     $self->{menu}->{$value} ||= $attr;
3137     ## ISSUE: <menu id=""><p contextmenu=""> match?
3138     },
3139     label => sub { }, ## NOTE: No conformance creteria
3140     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3141     }),
3142 wakaba 1.40 check_start => sub {
3143     my ($self, $item, $element_state) = @_;
3144     $element_state->{phase} = 'li or phrasing';
3145     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
3146     $self->{flag}->{in_menu} = 1;
3147     },
3148     check_child_element => sub {
3149     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3150     $child_is_transparent, $element_state) = @_;
3151     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3152     $self->{onerror}->(node => $child_el,
3153     type => 'element not allowed:minus',
3154     level => $self->{must_level});
3155     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3156     #
3157     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3158     if ($element_state->{phase} eq 'li') {
3159     #
3160     } elsif ($element_state->{phase} eq 'li or phrasing') {
3161     $element_state->{phase} = 'li';
3162     } else {
3163     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3164     }
3165     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3166     if ($element_state->{phase} eq 'phrasing') {
3167     #
3168     } elsif ($element_state->{phase} eq 'li or phrasing') {
3169     $element_state->{phase} = 'phrasing';
3170     } else {
3171     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3172     }
3173     } else {
3174     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3175     }
3176     },
3177     check_child_text => sub {
3178     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3179     if ($has_significant) {
3180     if ($element_state->{phase} eq 'phrasing') {
3181     #
3182     } elsif ($element_state->{phase} eq 'li or phrasing') {
3183     $element_state->{phase} = 'phrasing';
3184     } else {
3185     $self->{onerror}->(node => $child_node,
3186     type => 'character not allowed');
3187 wakaba 1.1 }
3188     }
3189 wakaba 1.40 },
3190     check_end => sub {
3191     my ($self, $item, $element_state) = @_;
3192     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
3193    
3194     if ($element_state->{phase} eq 'li') {
3195     $HTMLChecker{check_end}->(@_);
3196     } else { # 'phrasing' or 'li or phrasing'
3197     $HTMLPhrasingContentChecker{check_end}->(@_);
3198 wakaba 1.1 }
3199     },
3200 wakaba 1.8 };
3201    
3202     $Element->{$HTML_NS}->{datatemplate} = {
3203 wakaba 1.40 %HTMLChecker,
3204 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3205 wakaba 1.40 check_child_element => sub {
3206     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3207     $child_is_transparent, $element_state) = @_;
3208     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3209     $self->{onerror}->(node => $child_el,
3210     type => 'element not allowed:minus',
3211     level => $self->{must_level});
3212     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3213     #
3214     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
3215     #
3216     } else {
3217     $self->{onerror}->(node => $child_el,
3218     type => 'element not allowed:datatemplate');
3219     }
3220     },
3221     check_child_text => sub {
3222     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3223     if ($has_significant) {
3224     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3225 wakaba 1.8 }
3226     },
3227     is_xml_root => 1,
3228     };
3229    
3230     $Element->{$HTML_NS}->{rule} = {
3231 wakaba 1.40 %HTMLChecker,
3232 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3233 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3234 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3235 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3236 wakaba 1.8 }),
3237 wakaba 1.40 check_start => sub {
3238     my ($self, $item, $element_state) = @_;
3239     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
3240     },
3241     check_child_element => sub { },
3242     check_child_text => sub { },
3243     check_end => sub {
3244     my ($self, $item, $element_state) = @_;
3245     $self->_remove_plus_elements ($element_state);
3246     $HTMLChecker{check_end}->(@_);
3247 wakaba 1.8 },
3248     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3249     ## is applied to some conforming data, results in a conforming DOM tree.":
3250     ## We don't check against this.
3251     };
3252    
3253     $Element->{$HTML_NS}->{nest} = {
3254 wakaba 1.40 %HTMLEmptyChecker,
3255 wakaba 1.48 status => FEATURE_HTML5_AT_RISK,
3256 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
3257 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3258     mode => sub {
3259     my ($self, $attr) = @_;
3260     my $value = $attr->value;
3261     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3262     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3263     }
3264     },
3265 wakaba 1.8 }),
3266 wakaba 1.1 };
3267    
3268     $Element->{$HTML_NS}->{legend} = {
3269 wakaba 1.40 %HTMLPhrasingContentChecker,
3270 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
3271 wakaba 1.1 };
3272    
3273     $Element->{$HTML_NS}->{div} = {
3274 wakaba 1.40 %HTMLProseContentChecker,
3275 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
3276 wakaba 1.1 };
3277    
3278     $Element->{$HTML_NS}->{font} = {
3279 wakaba 1.40 %HTMLTransparentChecker,
3280 wakaba 1.48 status => FEATURE_HTML5_DEFAULT | FEATURE_HTML4_REC,
3281 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({}), ## TODO
3282 wakaba 1.1 };
3283    
3284     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3285    
3286     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24