/[suikacvs]/markup/html/whatpm/Whatpm/ContentChecker/HTML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/ContentChecker/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.47 - (hide annotations) (download)
Sun Feb 24 01:08:14 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.46: +11 -15 lines
++ whatpm/t/ChangeLog	24 Feb 2008 01:08:10 -0000
2008-02-24  Wakaba  <wakaba@suika.fam.cx>

	* content-model-1.dat: Some test results on |meta|
	in |noscript| are updated.

++ whatpm/Whatpm/ContentChecker/ChangeLog	24 Feb 2008 01:07:43 -0000
	* HTML.pm: Invalid |meta| in |noscript| should be erred
	as usual.

2008-02-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::ContentChecker;
2     use strict;
3     require Whatpm::ContentChecker;
4    
5     my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
6    
7 wakaba 1.29 ## December 2007 HTML5 Classification
8    
9     my $HTMLMetadataContent = {
10     $HTML_NS => {
11     title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1,
12     'event-source' => 1, command => 1, datatemplate => 1,
13     ## NOTE: A |meta| with no |name| element is not allowed as
14     ## a metadata content other than |head| element.
15     meta => 1,
16     },
17     ## NOTE: RDF is mentioned in the HTML5 spec.
18     ## TODO: Other RDF elements?
19     q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1},
20     };
21    
22     my $HTMLProseContent = {
23     $HTML_NS => {
24     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
25     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
26     footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1,
27     ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1,
28     details => 1, ## ISSUE: "Prose element" in spec.
29     datagrid => 1, ## ISSUE: "Prose element" in spec.
30     datatemplate => 1,
31     div => 1, ## ISSUE: No category in spec.
32     ## NOTE: |style| is only allowed if |scoped| attribute is specified.
33     ## Additionally, it must be before any other element or
34     ## non-inter-element-whitespace text node.
35     style => 1,
36    
37 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
38 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
39     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
40     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
41     command => 1, font => 1,
42     a => 1,
43     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
44     ## NOTE: |area| is allowed only as a descendant of |map|.
45     area => 1,
46    
47     ins => 1, del => 1,
48    
49     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
50     menu => 1,
51    
52     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
53     canvas => 1,
54     },
55    
56     ## NOTE: Embedded
57     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
58     q<http://www.w3.org/2000/svg> => {svg => 1},
59     };
60    
61     my $HTMLSectioningContent = {
62     $HTML_NS => {
63     section => 1, nav => 1, article => 1, blockquote => 1, aside => 1,
64     ## NOTE: |body| is only allowed in |html| element.
65     body => 1,
66     },
67     };
68    
69     my $HTMLHeadingContent = {
70     $HTML_NS => {
71     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1,
72     },
73     };
74    
75     my $HTMLPhrasingContent = {
76     ## NOTE: All phrasing content is also prose content.
77     $HTML_NS => {
78 wakaba 1.38 br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1,
79 wakaba 1.29 dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1,
80     var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1,
81     b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1,
82     command => 1, font => 1,
83     a => 1,
84     datagrid => 1, ## ISSUE: "Interactive element" in the spec.
85     ## NOTE: |area| is allowed only as a descendant of |map|.
86     area => 1,
87    
88     ## NOTE: Transparent.
89     ins => 1, del => 1,
90    
91     ## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose.
92     menu => 1,
93    
94     img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1,
95     canvas => 1,
96     },
97    
98     ## NOTE: Embedded
99     q<http://www.w3.org/1998/Math/MathML> => {math => 1},
100     q<http://www.w3.org/2000/svg> => {svg => 1},
101    
102     ## NOTE: And non-inter-element-whitespace text nodes.
103     };
104    
105 wakaba 1.40 ## $HTMLEmbeddedContent: See Whatpm::ContentChecker.
106 wakaba 1.29
107     my $HTMLInteractiveContent = {
108     $HTML_NS => {
109     a => 1,
110 wakaba 1.36 datagrid => 1, ## ISSUE: Categorized as "Inetractive element"
111 wakaba 1.29 },
112     };
113    
114 wakaba 1.36 ## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker.
115     ## NOTE: Semi-transparent elements: See Whatpm::ContentChecker.
116    
117     ## -- Common attribute syntacx checkers
118    
119 wakaba 1.1 our $AttrChecker;
120    
121     my $GetHTMLEnumeratedAttrChecker = sub {
122     my $states = shift; # {value => conforming ? 1 : -1}
123     return sub {
124     my ($self, $attr) = @_;
125     my $value = lc $attr->value; ## TODO: ASCII case insensitibility?
126     if ($states->{$value} > 0) {
127     #
128     } elsif ($states->{$value}) {
129     $self->{onerror}->(node => $attr, type => 'enumerated:non-conforming');
130     } else {
131     $self->{onerror}->(node => $attr, type => 'enumerated:invalid');
132     }
133     };
134     }; # $GetHTMLEnumeratedAttrChecker
135    
136     my $GetHTMLBooleanAttrChecker = sub {
137     my $local_name = shift;
138     return sub {
139     my ($self, $attr) = @_;
140     my $value = $attr->value;
141     unless ($value eq $local_name or $value eq '') {
142     $self->{onerror}->(node => $attr, type => 'boolean:invalid');
143     }
144     };
145     }; # $GetHTMLBooleanAttrChecker
146    
147 wakaba 1.8 ## Unordered set of space-separated tokens
148 wakaba 1.18 my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub {
149 wakaba 1.8 my ($self, $attr) = @_;
150     my %word;
151     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
152     unless ($word{$word}) {
153     $word{$word} = 1;
154     } else {
155     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
156     }
157     }
158 wakaba 1.18 }; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker
159 wakaba 1.8
160 wakaba 1.1 ## |rel| attribute (unordered set of space separated tokens,
161     ## whose allowed values are defined by the section on link types)
162     my $HTMLLinkTypesAttrChecker = sub {
163 wakaba 1.4 my ($a_or_area, $todo, $self, $attr) = @_;
164 wakaba 1.1 my %word;
165     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
166     unless ($word{$word}) {
167     $word{$word} = 1;
168 wakaba 1.18 } elsif ($word eq 'up') {
169     #
170 wakaba 1.1 } else {
171     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
172     }
173     }
174     ## NOTE: Case sensitive match (since HTML5 spec does not say link
175     ## types are case-insensitive and it says "The value should not
176     ## be confusingly similar to any other defined value (e.g.
177     ## differing only in case).").
178     ## NOTE: Though there is no explicit "MUST NOT" for undefined values,
179     ## "MAY"s and "only ... MAY" restrict non-standard non-registered
180     ## values to be used conformingly.
181     require Whatpm::_LinkTypeList;
182     our $LinkType;
183     for my $word (keys %word) {
184     my $def = $LinkType->{$word};
185     if (defined $def) {
186     if ($def->{status} eq 'accepted') {
187     if (defined $def->{effect}->[$a_or_area]) {
188     #
189     } else {
190     $self->{onerror}->(node => $attr,
191     type => 'link type:bad context:'.$word);
192     }
193     } elsif ($def->{status} eq 'proposal') {
194     $self->{onerror}->(node => $attr, level => 's',
195     type => 'link type:proposed:'.$word);
196 wakaba 1.20 if (defined $def->{effect}->[$a_or_area]) {
197     #
198     } else {
199     $self->{onerror}->(node => $attr,
200     type => 'link type:bad context:'.$word);
201     }
202 wakaba 1.1 } else { # rejected or synonym
203     $self->{onerror}->(node => $attr,
204     type => 'link type:non-conforming:'.$word);
205     }
206 wakaba 1.4 if (defined $def->{effect}->[$a_or_area]) {
207     if ($word eq 'alternate') {
208     #
209     } elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') {
210     $todo->{has_hyperlink_link_type} = 1;
211     }
212     }
213 wakaba 1.1 if ($def->{unique}) {
214     unless ($self->{has_link_type}->{$word}) {
215     $self->{has_link_type}->{$word} = 1;
216     } else {
217     $self->{onerror}->(node => $attr,
218     type => 'link type:duplicate:'.$word);
219     }
220     }
221     } else {
222     $self->{onerror}->(node => $attr, level => 'unsupported',
223     type => 'link type:'.$word);
224     }
225     }
226 wakaba 1.4 $todo->{has_hyperlink_link_type} = 1
227     if $word{alternate} and not $word{stylesheet};
228 wakaba 1.1 ## TODO: The Pingback 1.0 specification, which is referenced by HTML5,
229     ## says that using both X-Pingback: header field and HTML
230     ## <link rel=pingback> is deprecated and if both appears they
231     ## SHOULD contain exactly the same value.
232     ## ISSUE: Pingback 1.0 specification defines the exact representation
233     ## of its link element, which cannot be tested by the current arch.
234     ## ISSUE: Pingback 1.0 specification says that the document MUST NOT
235     ## include any string that matches to the pattern for the rel=pingback link,
236     ## which again inpossible to test.
237     ## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4.
238 wakaba 1.12
239     ## NOTE: <link rel="up index"><link rel="up up index"> is not an error.
240 wakaba 1.17 ## NOTE: We can't check "If the page is part of multiple hierarchies,
241     ## then they SHOULD be described in different paragraphs.".
242 wakaba 1.1 }; # $HTMLLinkTypesAttrChecker
243 wakaba 1.20
244     ## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status."
245 wakaba 1.1
246     ## URI (or IRI)
247     my $HTMLURIAttrChecker = sub {
248     my ($self, $attr) = @_;
249     ## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.)
250     my $value = $attr->value;
251     Whatpm::URIChecker->check_iri_reference ($value, sub {
252     my %opt = @_;
253     $self->{onerror}->(node => $attr, level => $opt{level},
254     type => 'URI::'.$opt{type}.
255     (defined $opt{position} ? ':'.$opt{position} : ''));
256     });
257 wakaba 1.17 $self->{has_uri_attr} = 1; ## TODO: <html manifest>
258 wakaba 1.1 }; # $HTMLURIAttrChecker
259    
260     ## A space separated list of one or more URIs (or IRIs)
261     my $HTMLSpaceURIsAttrChecker = sub {
262     my ($self, $attr) = @_;
263     my $i = 0;
264     for my $value (split /[\x09-\x0D\x20]+/, $attr->value) {
265     Whatpm::URIChecker->check_iri_reference ($value, sub {
266     my %opt = @_;
267     $self->{onerror}->(node => $attr, level => $opt{level},
268 wakaba 1.2 type => 'URIs:'.':'.
269     $opt{type}.':'.$i.
270 wakaba 1.1 (defined $opt{position} ? ':'.$opt{position} : ''));
271     });
272     $i++;
273     }
274     ## ISSUE: Relative references?
275     ## ISSUE: Leading or trailing white spaces are conformant?
276     ## ISSUE: A sequence of white space characters are conformant?
277     ## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.)
278     ## NOTE: Duplication seems not an error.
279 wakaba 1.4 $self->{has_uri_attr} = 1;
280 wakaba 1.1 }; # $HTMLSpaceURIsAttrChecker
281    
282     my $HTMLDatetimeAttrChecker = sub {
283     my ($self, $attr) = @_;
284     my $value = $attr->value;
285     ## ISSUE: "space", not "space character" (in parsing algorihtm, "space character")
286     if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) {
287     my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm)
288     = ($1, $2, $3, $4, $5, $6, $7, $8, $9);
289     if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm)
290     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
291     if $d < 1 or
292     $d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M];
293     $self->{onerror}->(node => $attr, type => 'datetime:bad day')
294     if $M == 2 and $d == 29 and
295     not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0));
296     } else {
297     $self->{onerror}->(node => $attr, type => 'datetime:bad month');
298     }
299     $self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23;
300     $self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59;
301     $self->{onerror}->(node => $attr, type => 'datetime:bad second')
302     if defined $s and $s > 59;
303     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour')
304     if $zh > 23;
305     $self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute')
306     if $zm > 59;
307     ## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339.
308     } else {
309     $self->{onerror}->(node => $attr, type => 'datetime:syntax error');
310     }
311     }; # $HTMLDatetimeAttrChecker
312    
313     my $HTMLIntegerAttrChecker = sub {
314     my ($self, $attr) = @_;
315     my $value = $attr->value;
316     unless ($value =~ /\A-?[0-9]+\z/) {
317     $self->{onerror}->(node => $attr, type => 'integer:syntax error');
318     }
319     }; # $HTMLIntegerAttrChecker
320    
321     my $GetHTMLNonNegativeIntegerAttrChecker = sub {
322     my $range_check = shift;
323     return sub {
324     my ($self, $attr) = @_;
325     my $value = $attr->value;
326     if ($value =~ /\A[0-9]+\z/) {
327     unless ($range_check->($value + 0)) {
328     $self->{onerror}->(node => $attr, type => 'nninteger:out of range');
329     }
330     } else {
331     $self->{onerror}->(node => $attr,
332     type => 'nninteger:syntax error');
333     }
334     };
335     }; # $GetHTMLNonNegativeIntegerAttrChecker
336    
337     my $GetHTMLFloatingPointNumberAttrChecker = sub {
338     my $range_check = shift;
339     return sub {
340     my ($self, $attr) = @_;
341     my $value = $attr->value;
342     if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) {
343     unless ($range_check->($value + 0)) {
344     $self->{onerror}->(node => $attr, type => 'float:out of range');
345     }
346     } else {
347     $self->{onerror}->(node => $attr,
348     type => 'float:syntax error');
349     }
350     };
351     }; # $GetHTMLFloatingPointNumberAttrChecker
352    
353     ## "A valid MIME type, optionally with parameters. [RFC 2046]"
354     ## ISSUE: RFC 2046 does not define syntax of media types.
355     ## ISSUE: The definition of "a valid MIME type" is unknown.
356     ## Syntactical correctness?
357     my $HTMLIMTAttrChecker = sub {
358     my ($self, $attr) = @_;
359     my $value = $attr->value;
360     ## ISSUE: RFC 2045 Content-Type header field allows insertion
361     ## of LWS/comments between tokens. Is it allowed in HTML? Maybe no.
362     ## ISSUE: RFC 2231 extension? Maybe no.
363     my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/;
364     my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
365     my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/;
366     if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) {
367     my @type = ($1, $2);
368     my $param = $3;
369     while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) {
370     if (defined $2) {
371     push @type, $1 => $2;
372     } else {
373     my $n = $1;
374     my $v = $2;
375     $v =~ s/\\(.)/$1/gs;
376     push @type, $n => $v;
377     }
378     }
379     require Whatpm::IMTChecker;
380     Whatpm::IMTChecker->check_imt (sub {
381     my %opt = @_;
382     $self->{onerror}->(node => $attr, level => $opt{level},
383     type => 'IMT:'.$opt{type});
384     }, @type);
385     } else {
386     $self->{onerror}->(node => $attr, type => 'IMT:syntax error');
387     }
388     }; # $HTMLIMTAttrChecker
389    
390     my $HTMLLanguageTagAttrChecker = sub {
391 wakaba 1.7 ## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm.
392    
393 wakaba 1.1 my ($self, $attr) = @_;
394 wakaba 1.6 my $value = $attr->value;
395     require Whatpm::LangTag;
396     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
397     my %opt = @_;
398     my $type = 'LangTag:'.$opt{type};
399     $type .= ':' . $opt{subtag} if defined $opt{subtag};
400     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
401     level => $opt{level});
402     });
403 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
404 wakaba 1.6
405     ## TODO: testdata
406 wakaba 1.1 }; # $HTMLLanguageTagAttrChecker
407    
408     ## "A valid media query [MQ]"
409     my $HTMLMQAttrChecker = sub {
410     my ($self, $attr) = @_;
411     $self->{onerror}->(node => $attr, level => 'unsupported',
412     type => 'media query');
413     ## ISSUE: What is "a valid media query"?
414     }; # $HTMLMQAttrChecker
415    
416     my $HTMLEventHandlerAttrChecker = sub {
417     my ($self, $attr) = @_;
418     $self->{onerror}->(node => $attr, level => 'unsupported',
419     type => 'event handler');
420     ## TODO: MUST contain valid ECMAScript code matching the
421     ## ECMAScript |FunctionBody| production. [ECMA262]
422     ## ISSUE: MUST be ES3? E4X? ES4? JS1.x?
423     ## ISSUE: Automatic semicolon insertion does not apply?
424     ## ISSUE: Other script languages?
425     }; # $HTMLEventHandlerAttrChecker
426    
427     my $HTMLUsemapAttrChecker = sub {
428     my ($self, $attr) = @_;
429     ## MUST be a valid hashed ID reference to a |map| element
430     my $value = $attr->value;
431     if ($value =~ s/^#//) {
432     ## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.)
433     push @{$self->{usemap}}, [$value => $attr];
434     } else {
435     $self->{onerror}->(node => $attr, type => '#idref:syntax error');
436     }
437     ## NOTE: Space characters in hashed ID references are conforming.
438     ## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported
439     }; # $HTMLUsemapAttrChecker
440    
441     my $HTMLTargetAttrChecker = sub {
442     my ($self, $attr) = @_;
443     my $value = $attr->value;
444     if ($value =~ /^_/) {
445     $value = lc $value; ## ISSUE: ASCII case-insentitive?
446     unless ({
447     _self => 1, _parent => 1, _top => 1,
448     }->{$value}) {
449     $self->{onerror}->(node => $attr,
450     type => 'reserved browsing context name');
451     }
452     } else {
453 wakaba 1.29 ## NOTE: An empty string is a valid browsing context name (same as _self).
454 wakaba 1.1 }
455     }; # $HTMLTargetAttrChecker
456    
457 wakaba 1.23 my $HTMLSelectorsAttrChecker = sub {
458     my ($self, $attr) = @_;
459    
460     ## ISSUE: Namespace resolution?
461    
462     my $value = $attr->value;
463    
464     require Whatpm::CSS::SelectorsParser;
465     my $p = Whatpm::CSS::SelectorsParser->new;
466     $p->{pseudo_class}->{$_} = 1 for qw/
467     active checked disabled empty enabled first-child first-of-type
468     focus hover indeterminate last-child last-of-type link only-child
469     only-of-type root target visited
470     lang nth-child nth-last-child nth-of-type nth-last-of-type not
471     -manakai-contains -manakai-current
472     /;
473    
474     $p->{pseudo_element}->{$_} = 1 for qw/
475     after before first-letter first-line
476     /;
477    
478     $p->{must_level} = $self->{must_level};
479     $p->{onerror} = sub {
480     my %opt = @_;
481     $opt{type} = 'selectors:'.$opt{type};
482     $self->{onerror}->(%opt, node => $attr);
483     };
484     $p->parse_string ($value);
485     }; # $HTMLSelectorsAttrChecker
486    
487 wakaba 1.1 my $HTMLAttrChecker = {
488     id => sub {
489     ## NOTE: |map| has its own variant of |id=""| checker
490     my ($self, $attr) = @_;
491     my $value = $attr->value;
492     if (length $value > 0) {
493     if ($self->{id}->{$value}) {
494     $self->{onerror}->(node => $attr, type => 'duplicate ID');
495     push @{$self->{id}->{$value}}, $attr;
496     } else {
497     $self->{id}->{$value} = [$attr];
498     }
499     if ($value =~ /[\x09-\x0D\x20]/) {
500     $self->{onerror}->(node => $attr, type => 'space in ID');
501     }
502     } else {
503     ## NOTE: MUST contain at least one character
504     $self->{onerror}->(node => $attr, type => 'empty attribute value');
505     }
506     },
507     title => sub {}, ## NOTE: No conformance creteria
508     lang => sub {
509     my ($self, $attr) = @_;
510 wakaba 1.6 my $value = $attr->value;
511     if ($value eq '') {
512     #
513     } else {
514     require Whatpm::LangTag;
515     Whatpm::LangTag->check_rfc3066_language_tag ($value, sub {
516     my %opt = @_;
517     my $type = 'LangTag:'.$opt{type};
518     $type .= ':' . $opt{subtag} if defined $opt{subtag};
519     $self->{onerror}->(node => $attr, type => $type, value => $opt{value},
520     level => $opt{level});
521     });
522     }
523 wakaba 1.1 ## ISSUE: RFC 4646 (3066bis)?
524     unless ($attr->owner_document->manakai_is_html) {
525     $self->{onerror}->(node => $attr, type => 'in XML:lang');
526     }
527 wakaba 1.6
528     ## TODO: test data
529 wakaba 1.1 },
530     dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}),
531     class => sub {
532     my ($self, $attr) = @_;
533     my %word;
534     for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) {
535     unless ($word{$word}) {
536     $word{$word} = 1;
537     push @{$self->{return}->{class}->{$word}||=[]}, $attr;
538     } else {
539     $self->{onerror}->(node => $attr, type => 'duplicate token:'.$word);
540     }
541     }
542     },
543     contextmenu => sub {
544     my ($self, $attr) = @_;
545     my $value = $attr->value;
546     push @{$self->{contextmenu}}, [$value => $attr];
547     ## ISSUE: "The value must be the ID of a menu element in the DOM."
548     ## What is "in the DOM"? A menu Element node that is not part
549     ## of the Document tree is in the DOM? A menu Element node that
550     ## belong to another Document tree is in the DOM?
551     },
552     irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'),
553 wakaba 1.8 tabindex => $HTMLIntegerAttrChecker
554     ## TODO: ref, template, registrationmark
555 wakaba 1.1 };
556    
557     for (qw/
558     onabort onbeforeunload onblur onchange onclick oncontextmenu
559     ondblclick ondrag ondragend ondragenter ondragleave ondragover
560     ondragstart ondrop onerror onfocus onkeydown onkeypress
561     onkeyup onload onmessage onmousedown onmousemove onmouseout
562     onmouseover onmouseup onmousewheel onresize onscroll onselect
563     onsubmit onunload
564     /) {
565     $HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker;
566     }
567    
568     my $GetHTMLAttrsChecker = sub {
569     my $element_specific_checker = shift;
570     return sub {
571 wakaba 1.40 my ($self, $item, $element_state) = @_;
572     for my $attr (@{$item->{node}->attributes}) {
573 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
574     $attr_ns = '' unless defined $attr_ns;
575     my $attr_ln = $attr->manakai_local_name;
576     my $checker;
577     if ($attr_ns eq '') {
578     $checker = $element_specific_checker->{$attr_ln}
579 wakaba 1.40 || $HTMLAttrChecker->{$attr_ln};
580 wakaba 1.1 }
581     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
582 wakaba 1.40 || $AttrChecker->{$attr_ns}->{''};
583 wakaba 1.1 if ($checker) {
584 wakaba 1.40 $checker->($self, $attr, $item);
585 wakaba 1.1 } else {
586     $self->{onerror}->(node => $attr, level => 'unsupported',
587     type => 'attribute');
588     ## ISSUE: No comformance createria for unknown attributes in the spec
589     }
590     }
591     };
592     }; # $GetHTMLAttrsChecker
593    
594 wakaba 1.40 my %HTMLChecker = (
595     %Whatpm::ContentChecker::AnyChecker,
596     check_attrs => $GetHTMLAttrsChecker->({}),
597     );
598    
599     my %HTMLEmptyChecker = (
600     %HTMLChecker,
601     check_child_element => sub {
602     my ($self, $item, $child_el, $child_nsuri, $child_ln,
603     $child_is_transparent, $element_state) = @_;
604     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
605     $self->{onerror}->(node => $child_el,
606     type => 'element not allowed:minus',
607     level => $self->{must_level});
608     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
609     #
610     } else {
611     $self->{onerror}->(node => $child_el,
612     type => 'element not allowed:empty',
613     level => $self->{must_level});
614     }
615     },
616     check_child_text => sub {
617     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
618     if ($has_significant) {
619     $self->{onerror}->(node => $child_node,
620     type => 'character not allowed:empty',
621     level => $self->{must_level});
622     }
623     },
624     );
625    
626     my %HTMLTextChecker = (
627     %HTMLChecker,
628     check_child_element => sub {
629     my ($self, $item, $child_el, $child_nsuri, $child_ln,
630     $child_is_transparent, $element_state) = @_;
631     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
632     $self->{onerror}->(node => $child_el,
633     type => 'element not allowed:minus',
634     level => $self->{must_level});
635     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
636     #
637     } else {
638     $self->{onerror}->(node => $child_el, type => 'element not allowed');
639     }
640     },
641     );
642    
643     my %HTMLProseContentChecker = (
644     %HTMLChecker,
645     check_child_element => sub {
646     my ($self, $item, $child_el, $child_nsuri, $child_ln,
647     $child_is_transparent, $element_state) = @_;
648     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
649     $self->{onerror}->(node => $child_el,
650     type => 'element not allowed:minus',
651     level => $self->{must_level});
652     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
653     #
654     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
655     if ($element_state->{has_non_style} or
656     not $child_el->has_attribute_ns (undef, 'scoped')) {
657     $self->{onerror}->(node => $child_el,
658     type => 'element not allowed:prose style',
659     level => $self->{must_level});
660     }
661     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
662 wakaba 1.43 $element_state->{has_non_style} = 1 unless $child_is_transparent;
663 wakaba 1.40 } else {
664     $element_state->{has_non_style} = 1;
665     $self->{onerror}->(node => $child_el,
666     type => 'element not allowed:prose',
667     level => $self->{must_level})
668     }
669     },
670     check_child_text => sub {
671     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
672     if ($has_significant) {
673     $element_state->{has_non_style} = 1;
674     }
675     },
676     check_end => sub {
677     my ($self, $item, $element_state) = @_;
678     if ($element_state->{has_significant}) {
679 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
680 wakaba 1.40 } elsif ($item->{transparent}) {
681     #
682     } else {
683     $self->{onerror}->(node => $item->{node},
684     level => $self->{should_level},
685     type => 'no significant content');
686     }
687     },
688     );
689    
690     my %HTMLPhrasingContentChecker = (
691     %HTMLChecker,
692     check_child_element => sub {
693     my ($self, $item, $child_el, $child_nsuri, $child_ln,
694     $child_is_transparent, $element_state) = @_;
695     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
696     $self->{onerror}->(node => $child_el,
697     type => 'element not allowed:minus',
698     level => $self->{must_level});
699     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
700     #
701     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
702     #
703     } else {
704     $self->{onerror}->(node => $child_el,
705     type => 'element not allowed:phrasing',
706     level => $self->{must_level});
707     }
708     },
709     check_end => $HTMLProseContentChecker{check_end},
710     ## NOTE: The definition for |li| assumes that the only differences
711     ## between prose and phrasing content checkers are |check_child_element|
712     ## and |check_child_text|.
713     );
714    
715     my %HTMLTransparentChecker = %HTMLProseContentChecker;
716     ## ISSUE: Significant content rule should be applied to transparent element
717 wakaba 1.46 ## with parent?
718 wakaba 1.40
719 wakaba 1.1 our $Element;
720     our $ElementDefault;
721    
722     $Element->{$HTML_NS}->{''} = {
723 wakaba 1.40 %HTMLChecker,
724     check_start => $ElementDefault->{check_start},
725 wakaba 1.1 };
726    
727     $Element->{$HTML_NS}->{html} = {
728     is_root => 1,
729 wakaba 1.40 check_attrs => $GetHTMLAttrsChecker->({
730 wakaba 1.16 manifest => $HTMLURIAttrChecker,
731 wakaba 1.1 xmlns => sub {
732     my ($self, $attr) = @_;
733     my $value = $attr->value;
734     unless ($value eq $HTML_NS) {
735     $self->{onerror}->(node => $attr, type => 'invalid attribute value');
736     }
737     unless ($attr->owner_document->manakai_is_html) {
738     $self->{onerror}->(node => $attr, type => 'in XML:xmlns');
739     ## TODO: Test
740     }
741     },
742     }),
743 wakaba 1.40 check_start => sub {
744     my ($self, $item, $element_state) = @_;
745     $element_state->{phase} = 'before head';
746     },
747     check_child_element => sub {
748     my ($self, $item, $child_el, $child_nsuri, $child_ln,
749     $child_is_transparent, $element_state) = @_;
750     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
751     $self->{onerror}->(node => $child_el,
752     type => 'element not allowed:minus',
753     level => $self->{must_level});
754     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
755     #
756     } elsif ($element_state->{phase} eq 'before head') {
757     if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') {
758     $element_state->{phase} = 'after head';
759     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
760     $self->{onerror}->(node => $child_el,
761     type => 'ps element missing:head');
762     $element_state->{phase} = 'after body';
763     } else {
764     $self->{onerror}->(node => $child_el,
765     type => 'element not allowed');
766     }
767     } elsif ($element_state->{phase} eq 'after head') {
768     if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') {
769     $element_state->{phase} = 'after body';
770     } else {
771     $self->{onerror}->(node => $child_el,
772     type => 'element not allowed');
773     }
774     } elsif ($element_state->{phase} eq 'after body') {
775     $self->{onerror}->(node => $child_el,
776     type => 'element not allowed');
777     } else {
778     die "check_child_element: Bad |html| phase: $element_state->{phase}";
779     }
780     },
781     check_child_text => sub {
782     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
783     if ($has_significant) {
784     $self->{onerror}->(node => $child_node,
785     type => 'character not allowed');
786     }
787     },
788     check_end => sub {
789     my ($self, $item, $element_state) = @_;
790     if ($element_state->{phase} eq 'after body') {
791     #
792     } elsif ($element_state->{phase} eq 'before head') {
793     $self->{onerror}->(node => $item->{node},
794     type => 'child element missing:head');
795     $self->{onerror}->(node => $item->{node},
796     type => 'child element missing:body');
797     } elsif ($element_state->{phase} eq 'after head') {
798     $self->{onerror}->(node => $item->{node},
799     type => 'child element missing:body');
800     } else {
801     die "check_end: Bad |html| phase: $element_state->{phase}";
802     }
803 wakaba 1.1
804 wakaba 1.40 $HTMLChecker{check_end}->(@_);
805     },
806     };
807 wakaba 1.25
808 wakaba 1.40 $Element->{$HTML_NS}->{head} = {
809     check_attrs => $GetHTMLAttrsChecker->({}),
810     check_child_element => sub {
811     my ($self, $item, $child_el, $child_nsuri, $child_ln,
812     $child_is_transparent, $element_state) = @_;
813     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
814     $self->{onerror}->(node => $child_el,
815     type => 'element not allowed:minus',
816     level => $self->{must_level});
817     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
818     #
819     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') {
820     unless ($element_state->{has_title}) {
821     $element_state->{has_title} = 1;
822     } else {
823     $self->{onerror}->(node => $child_el,
824     type => 'element not allowed:head title',
825     level => $self->{must_level});
826     }
827     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
828     if ($child_el->has_attribute_ns (undef, 'scoped')) {
829     $self->{onerror}->(node => $child_el,
830     type => 'element not allowed:head style',
831     level => $self->{must_level});
832 wakaba 1.1 }
833 wakaba 1.40 } elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) {
834     #
835    
836     ## NOTE: |meta| is a metadata content. However, strictly speaking,
837     ## a |meta| element with none of |charset|, |name|,
838     ## or |http-equiv| attribute is not allowed. It is non-conforming
839     ## anyway.
840     } else {
841     $self->{onerror}->(node => $child_el,
842     type => 'element not allowed:metadata',
843     level => $self->{must_level});
844     }
845     $element_state->{in_head_original} = $self->{flag}->{in_head};
846     $self->{flag}->{in_head} = 1;
847     },
848     check_child_text => sub {
849     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
850     if ($has_significant) {
851     $self->{onerror}->(node => $child_node, type => 'character not allowed');
852 wakaba 1.1 }
853 wakaba 1.40 },
854     check_end => sub {
855     my ($self, $item, $element_state) = @_;
856     unless ($element_state->{has_title}) {
857     $self->{onerror}->(node => $item->{node},
858     type => 'child element missing:title');
859 wakaba 1.1 }
860 wakaba 1.40 $self->{flag}->{in_head} = $element_state->{in_head_original};
861 wakaba 1.1
862 wakaba 1.40 $HTMLChecker{check_end}->(@_);
863 wakaba 1.1 },
864     };
865    
866 wakaba 1.40 $Element->{$HTML_NS}->{title} = {
867     %HTMLTextChecker,
868     };
869 wakaba 1.1
870 wakaba 1.40 $Element->{$HTML_NS}->{base} = {
871     %HTMLEmptyChecker,
872     check_attrs => sub {
873     my ($self, $item, $element_state) = @_;
874 wakaba 1.1
875 wakaba 1.40 if ($self->{has_base}) {
876     $self->{onerror}->(node => $item->{node},
877     type => 'element not allowed:base');
878     } else {
879     $self->{has_base} = 1;
880 wakaba 1.29 }
881    
882 wakaba 1.40 my $has_href = $item->{node}->has_attribute_ns (undef, 'href');
883     my $has_target = $item->{node}->has_attribute_ns (undef, 'target');
884 wakaba 1.14
885     if ($self->{has_uri_attr} and $has_href) {
886 wakaba 1.4 ## ISSUE: Are these examples conforming?
887     ## <head profile="a b c"><base href> (except for |profile|'s
888     ## non-conformance)
889     ## <title xml:base="relative"/><base href/> (maybe it should be)
890     ## <unknown xmlns="relative"/><base href/> (assuming that
891     ## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though))
892     ## <style>@import 'relative';</style><base href>
893     ## <script>location.href = 'relative';</script><base href>
894 wakaba 1.14 ## NOTE: <html manifest=".."><head><base href=""/> is conforming as
895     ## an exception.
896 wakaba 1.40 $self->{onerror}->(node => $item->{node},
897 wakaba 1.4 type => 'basehref after URI attribute');
898     }
899 wakaba 1.14 if ($self->{has_hyperlink_element} and $has_target) {
900 wakaba 1.4 ## ISSUE: Are these examples conforming?
901     ## <head><title xlink:href=""/><base target="name"/></head>
902     ## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/>
903     ## (assuming that |xbl:xbl| is allowed before |base|)
904     ## NOTE: These are non-conformant anyway because of |head|'s content model:
905     ## <link href=""/><base target="name"/>
906     ## <link rel=unknown href=""><base target=name>
907 wakaba 1.40 $self->{onerror}->(node => $item->{node},
908 wakaba 1.4 type => 'basetarget after hyperlink');
909     }
910    
911 wakaba 1.14 if (not $has_href and not $has_target) {
912 wakaba 1.40 $self->{onerror}->(node => $item->{node},
913 wakaba 1.14 type => 'attribute missing:href|target');
914     }
915    
916 wakaba 1.4 return $GetHTMLAttrsChecker->({
917     href => $HTMLURIAttrChecker,
918     target => $HTMLTargetAttrChecker,
919 wakaba 1.40 })->($self, $item, $element_state);
920 wakaba 1.4 },
921 wakaba 1.1 };
922    
923     $Element->{$HTML_NS}->{link} = {
924 wakaba 1.40 %HTMLEmptyChecker,
925     check_attrs => sub {
926     my ($self, $item, $element_state) = @_;
927 wakaba 1.1 $GetHTMLAttrsChecker->({
928     href => $HTMLURIAttrChecker,
929 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) },
930 wakaba 1.1 media => $HTMLMQAttrChecker,
931     hreflang => $HTMLLanguageTagAttrChecker,
932     type => $HTMLIMTAttrChecker,
933     ## NOTE: Though |title| has special semantics,
934     ## syntactically same as the |title| as global attribute.
935 wakaba 1.40 })->($self, $item, $element_state);
936     if ($item->{node}->has_attribute_ns (undef, 'href')) {
937     $self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type};
938 wakaba 1.4 } else {
939 wakaba 1.40 $self->{onerror}->(node => $item->{node},
940 wakaba 1.1 type => 'attribute missing:href');
941     }
942 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'rel')) {
943     $self->{onerror}->(node => $item->{node},
944 wakaba 1.1 type => 'attribute missing:rel');
945     }
946     },
947     };
948    
949     $Element->{$HTML_NS}->{meta} = {
950 wakaba 1.40 %HTMLEmptyChecker,
951     check_attrs => sub {
952     my ($self, $item, $element_state) = @_;
953 wakaba 1.1 my $name_attr;
954     my $http_equiv_attr;
955     my $charset_attr;
956     my $content_attr;
957 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
958 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
959     $attr_ns = '' unless defined $attr_ns;
960     my $attr_ln = $attr->manakai_local_name;
961     my $checker;
962     if ($attr_ns eq '') {
963     if ($attr_ln eq 'content') {
964     $content_attr = $attr;
965     $checker = 1;
966     } elsif ($attr_ln eq 'name') {
967     $name_attr = $attr;
968     $checker = 1;
969     } elsif ($attr_ln eq 'http-equiv') {
970     $http_equiv_attr = $attr;
971     $checker = 1;
972     } elsif ($attr_ln eq 'charset') {
973     $charset_attr = $attr;
974     $checker = 1;
975     } else {
976     $checker = $HTMLAttrChecker->{$attr_ln}
977     || $AttrChecker->{$attr_ns}->{$attr_ln}
978     || $AttrChecker->{$attr_ns}->{''};
979     }
980     } else {
981     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
982     || $AttrChecker->{$attr_ns}->{''};
983     }
984     if ($checker) {
985     $checker->($self, $attr) if ref $checker;
986     } else {
987     $self->{onerror}->(node => $attr, level => 'unsupported',
988     type => 'attribute');
989     ## ISSUE: No comformance createria for unknown attributes in the spec
990     }
991     }
992    
993     if (defined $name_attr) {
994     if (defined $http_equiv_attr) {
995     $self->{onerror}->(node => $http_equiv_attr,
996     type => 'attribute not allowed');
997     } elsif (defined $charset_attr) {
998     $self->{onerror}->(node => $charset_attr,
999     type => 'attribute not allowed');
1000     }
1001     my $metadata_name = $name_attr->value;
1002     my $metadata_value;
1003     if (defined $content_attr) {
1004     $metadata_value = $content_attr->value;
1005     } else {
1006 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1007 wakaba 1.1 type => 'attribute missing:content');
1008     $metadata_value = '';
1009     }
1010     } elsif (defined $http_equiv_attr) {
1011     if (defined $charset_attr) {
1012     $self->{onerror}->(node => $charset_attr,
1013     type => 'attribute not allowed');
1014     }
1015     unless (defined $content_attr) {
1016 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1017 wakaba 1.1 type => 'attribute missing:content');
1018     }
1019     } elsif (defined $charset_attr) {
1020     if (defined $content_attr) {
1021     $self->{onerror}->(node => $content_attr,
1022     type => 'attribute not allowed');
1023     }
1024     } else {
1025     if (defined $content_attr) {
1026     $self->{onerror}->(node => $content_attr,
1027     type => 'attribute not allowed');
1028 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1029 wakaba 1.1 type => 'attribute missing:name|http-equiv');
1030     } else {
1031 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1032 wakaba 1.1 type => 'attribute missing:name|http-equiv|charset');
1033     }
1034     }
1035    
1036 wakaba 1.32 my $check_charset_decl = sub () {
1037 wakaba 1.40 my $parent = $item->{node}->manakai_parent_element;
1038 wakaba 1.29 if ($parent and $parent eq $parent->owner_document->manakai_head) {
1039     for my $el (@{$parent->child_nodes}) {
1040     next unless $el->node_type == 1; # ELEMENT_NODE
1041 wakaba 1.40 unless ($el eq $item->{node}) {
1042 wakaba 1.29 ## NOTE: Not the first child element.
1043 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1044 wakaba 1.32 type => 'element not allowed:meta charset',
1045     level => $self->{must_level});
1046 wakaba 1.29 }
1047     last;
1048     ## NOTE: Entity references are not supported.
1049     }
1050     } else {
1051 wakaba 1.40 $self->{onerror}->(node => $item->{node},
1052 wakaba 1.32 type => 'element not allowed:meta charset',
1053     level => $self->{must_level});
1054 wakaba 1.29 }
1055    
1056 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
1057     $self->{onerror}->(node => $item->{node},
1058 wakaba 1.32 type => 'in XML:charset',
1059     level => $self->{must_level});
1060 wakaba 1.1 }
1061 wakaba 1.32 }; # $check_charset_decl
1062 wakaba 1.21
1063 wakaba 1.32 my $check_charset = sub ($$) {
1064     my ($attr, $charset_value) = @_;
1065 wakaba 1.21 ## NOTE: Though the case-sensitivility of |charset| attribute value
1066     ## is not explicitly spelled in the HTML5 spec, the Character Set
1067     ## registry of IANA, which is referenced from HTML5 spec, says that
1068     ## charset name is case-insensitive.
1069     $charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive.
1070    
1071     require Message::Charset::Info;
1072     my $charset = $Message::Charset::Info::IANACharset->{$charset_value};
1073 wakaba 1.40 my $ic = $item->{node}->owner_document->input_encoding;
1074 wakaba 1.21 if (defined $ic) {
1075     ## TODO: Test for this case
1076     my $ic_charset = $Message::Charset::Info::IANACharset->{$ic};
1077     if ($charset ne $ic_charset) {
1078 wakaba 1.32 $self->{onerror}->(node => $attr,
1079 wakaba 1.21 type => 'mismatched charset name:'.$ic.
1080 wakaba 1.32 ':'.$charset_value, ## TODO: This should be a |value| value.
1081     level => $self->{must_level});
1082 wakaba 1.21 }
1083     } else {
1084     ## NOTE: MUST, but not checkable, since the document is not originally
1085     ## in serialized form (or the parser does not preserve the input
1086     ## encoding information).
1087 wakaba 1.32 $self->{onerror}->(node => $attr,
1088     type => 'mismatched charset name::'.$charset_value, ## TODO: |value|
1089 wakaba 1.21 level => 'unsupported');
1090     }
1091    
1092     ## ISSUE: What is "valid character encoding name"? Syntactically valid?
1093     ## Syntactically valid and registered? What about x-charset names?
1094     unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name
1095     ($charset_value)) {
1096 wakaba 1.32 $self->{onerror}->(node => $attr,
1097     type => 'charset:syntax error:'.$charset_value, ## TODO
1098     level => $self->{must_level});
1099 wakaba 1.21 }
1100    
1101     if ($charset) {
1102     ## ISSUE: What is "the preferred name for that encoding" (for a charset
1103     ## with no "preferred MIME name" label)?
1104     my $charset_status = $charset->{iana_names}->{$charset_value} || 0;
1105     if (($charset_status &
1106     Message::Charset::Info::PREFERRED_CHARSET_NAME ())
1107     != Message::Charset::Info::PREFERRED_CHARSET_NAME ()) {
1108 wakaba 1.32 $self->{onerror}->(node => $attr,
1109 wakaba 1.21 type => 'charset:not preferred:'.
1110 wakaba 1.32 $charset_value, ## TODO
1111     level => $self->{must_level});
1112 wakaba 1.21 }
1113     if (($charset_status &
1114     Message::Charset::Info::REGISTERED_CHARSET_NAME ())
1115     != Message::Charset::Info::REGISTERED_CHARSET_NAME ()) {
1116     if ($charset_value =~ /^x-/) {
1117 wakaba 1.32 $self->{onerror}->(node => $attr,
1118     type => 'charset:private:'.$charset_value, ## TODO
1119 wakaba 1.21 level => $self->{good_level});
1120     } else {
1121 wakaba 1.32 $self->{onerror}->(node => $attr,
1122 wakaba 1.21 type => 'charset:not registered:'.
1123 wakaba 1.32 $charset_value, ## TODO
1124 wakaba 1.21 level => $self->{good_level});
1125     }
1126     }
1127     } elsif ($charset_value =~ /^x-/) {
1128 wakaba 1.32 $self->{onerror}->(node => $attr,
1129     type => 'charset:private:'.$charset_value, ## TODO
1130 wakaba 1.21 level => $self->{good_level});
1131     } else {
1132 wakaba 1.32 $self->{onerror}->(node => $attr,
1133     type => 'charset:not registered:'.$charset_value, ## TODO
1134 wakaba 1.21 level => $self->{good_level});
1135     }
1136    
1137 wakaba 1.32 if ($attr->get_user_data ('manakai_has_reference')) {
1138     $self->{onerror}->(node => $attr,
1139 wakaba 1.22 type => 'character reference in charset',
1140     level => $self->{must_level});
1141     }
1142 wakaba 1.32 }; # $check_charset
1143    
1144     ## TODO: metadata conformance
1145    
1146     ## TODO: pragma conformance
1147     if (defined $http_equiv_attr) { ## An enumerated attribute
1148     my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case?
1149     if ({
1150     'refresh' => 1,
1151     'default-style' => 1,
1152     }->{$keyword}) {
1153     #
1154 wakaba 1.33
1155     ## TODO: More than one occurence is a MUST-level error (revision 1180).
1156 wakaba 1.32 } elsif ($keyword eq 'content-type') {
1157 wakaba 1.33 ## ISSUE: Though it is renamed as "Encoding declaration" state in rev
1158     ## 1221, there are still many occurence of "Content-Type" state in
1159     ## the spec.
1160    
1161 wakaba 1.32 $check_charset_decl->();
1162     if ($content_attr) {
1163     my $content = $content_attr->value;
1164     if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) {
1165     $check_charset->($content_attr, $1);
1166     } else {
1167     $self->{onerror}->(node => $content_attr,
1168     type => 'meta content-type syntax error',
1169     level => $self->{must_level});
1170     }
1171     }
1172     } else {
1173     $self->{onerror}->(node => $http_equiv_attr,
1174     type => 'enumerated:invalid');
1175     }
1176     }
1177    
1178     if (defined $charset_attr) {
1179     $check_charset_decl->();
1180     $check_charset->($charset_attr, $charset_attr->value);
1181 wakaba 1.1 }
1182     },
1183     };
1184    
1185     $Element->{$HTML_NS}->{style} = {
1186 wakaba 1.40 %HTMLChecker,
1187     check_attrs => $GetHTMLAttrsChecker->({
1188 wakaba 1.1 type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language
1189     media => $HTMLMQAttrChecker,
1190     scoped => $GetHTMLBooleanAttrChecker->('scoped'),
1191     ## NOTE: |title| has special semantics for |style|s, but is syntactically
1192     ## not different
1193     }),
1194 wakaba 1.40 check_start => sub {
1195     my ($self, $item, $element_state) = @_;
1196    
1197 wakaba 1.27 ## NOTE: |html:style| itself has no conformance creteria on content model.
1198 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
1199 wakaba 1.27 if (not defined $type or
1200     $type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) {
1201 wakaba 1.40 $element_state->{allow_element} = 0;
1202     $element_state->{style_type} = 'text/css';
1203     } else {
1204     $element_state->{allow_element} = 1; # unknown
1205     $element_state->{style_type} = $type; ## TODO: $type normalization
1206     }
1207     },
1208     check_child_element => sub {
1209     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1210     $child_is_transparent, $element_state) = @_;
1211     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1212     $self->{onerror}->(node => $child_el,
1213     type => 'element not allowed:minus',
1214     level => $self->{must_level});
1215     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1216     #
1217     } elsif ($element_state->{allow_element}) {
1218     #
1219     } else {
1220     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1221     }
1222     },
1223     check_child_text => sub {
1224     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1225     $element_state->{text} .= $child_node->text_content;
1226     },
1227     check_end => sub {
1228     my ($self, $item, $element_state) = @_;
1229     if ($element_state->{style_type} eq 'text/css') {
1230     $self->{onsubdoc}->({s => $element_state->{text},
1231     container_node => $item->{node},
1232 wakaba 1.28 media_type => 'text/css', is_char_string => 1});
1233 wakaba 1.27 } else {
1234 wakaba 1.40 $self->{onerror}->(node => $item->{node}, level => 'unsupported',
1235     type => 'style:'.$element_state->{style_type});
1236 wakaba 1.27 }
1237 wakaba 1.40
1238     $HTMLChecker{check_end}->(@_);
1239 wakaba 1.1 },
1240     };
1241 wakaba 1.25 ## ISSUE: Relationship to significant content check?
1242 wakaba 1.1
1243     $Element->{$HTML_NS}->{body} = {
1244 wakaba 1.40 %HTMLProseContentChecker,
1245 wakaba 1.1 };
1246    
1247     $Element->{$HTML_NS}->{section} = {
1248 wakaba 1.40 %HTMLProseContentChecker,
1249 wakaba 1.1 };
1250    
1251     $Element->{$HTML_NS}->{nav} = {
1252 wakaba 1.40 %HTMLProseContentChecker,
1253 wakaba 1.1 };
1254    
1255     $Element->{$HTML_NS}->{article} = {
1256 wakaba 1.40 %HTMLProseContentChecker,
1257 wakaba 1.1 };
1258    
1259     $Element->{$HTML_NS}->{blockquote} = {
1260 wakaba 1.40 %HTMLProseContentChecker,
1261     check_attrs => $GetHTMLAttrsChecker->({
1262 wakaba 1.1 cite => $HTMLURIAttrChecker,
1263     }),
1264     };
1265    
1266     $Element->{$HTML_NS}->{aside} = {
1267 wakaba 1.40 %HTMLProseContentChecker,
1268 wakaba 1.1 };
1269    
1270     $Element->{$HTML_NS}->{h1} = {
1271 wakaba 1.40 %HTMLPhrasingContentChecker,
1272     check_start => sub {
1273     my ($self, $item, $element_state) = @_;
1274     $self->{flag}->{has_hn} = 1;
1275 wakaba 1.1 },
1276     };
1277    
1278 wakaba 1.40 $Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}};
1279 wakaba 1.1
1280 wakaba 1.40 $Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}};
1281 wakaba 1.1
1282 wakaba 1.40 $Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}};
1283 wakaba 1.1
1284 wakaba 1.40 $Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}};
1285 wakaba 1.1
1286 wakaba 1.40 $Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}};
1287 wakaba 1.1
1288 wakaba 1.29 ## TODO: Explicit sectioning is "encouraged".
1289    
1290 wakaba 1.1 $Element->{$HTML_NS}->{header} = {
1291 wakaba 1.40 %HTMLProseContentChecker,
1292     check_start => sub {
1293     my ($self, $item, $element_state) = @_;
1294     $self->_add_minus_elements ($element_state,
1295     {$HTML_NS => {qw/header 1 footer 1/}},
1296     $HTMLSectioningContent);
1297     $element_state->{has_hn_original} = $self->{flag}->{has_hn};
1298     $self->{flag}->{has_hn} = 0;
1299     },
1300     check_end => sub {
1301     my ($self, $item, $element_state) = @_;
1302     $self->_remove_minus_elements ($element_state);
1303     unless ($self->{flag}->{has_hn}) {
1304     $self->{onerror}->(node => $item->{node},
1305     type => 'element missing:hn');
1306     }
1307     $self->{flag}->{has_hn} ||= $element_state->{has_hn_original};
1308 wakaba 1.1
1309 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1310 wakaba 1.1 },
1311 wakaba 1.40 ## ISSUE: <header><del><h1>...</h1></del></header> is conforming?
1312 wakaba 1.1 };
1313    
1314     $Element->{$HTML_NS}->{footer} = {
1315 wakaba 1.40 %HTMLProseContentChecker,
1316     check_start => sub {
1317     my ($self, $item, $element_state) = @_;
1318     $self->_add_minus_elements ($element_state,
1319     {$HTML_NS => {footer => 1}},
1320     $HTMLSectioningContent, $HTMLHeadingContent);
1321     },
1322     check_end => sub {
1323     my ($self, $item, $element_state) = @_;
1324     $self->_remove_minus_elements ($element_state);
1325 wakaba 1.1
1326 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1327 wakaba 1.1 },
1328     };
1329    
1330     $Element->{$HTML_NS}->{address} = {
1331 wakaba 1.40 %HTMLProseContentChecker,
1332     check_start => sub {
1333     my ($self, $item, $element_state) = @_;
1334     $self->_add_minus_elements ($element_state,
1335     {$HTML_NS => {footer => 1, address => 1}},
1336     $HTMLSectioningContent, $HTMLHeadingContent);
1337     },
1338     check_end => sub {
1339     my ($self, $item, $element_state) = @_;
1340     $self->_remove_minus_elements ($element_state);
1341 wakaba 1.29
1342 wakaba 1.40 $HTMLProseContentChecker{check_end}->(@_);
1343 wakaba 1.29 },
1344 wakaba 1.1 };
1345    
1346     $Element->{$HTML_NS}->{p} = {
1347 wakaba 1.40 %HTMLPhrasingContentChecker,
1348 wakaba 1.1 };
1349    
1350     $Element->{$HTML_NS}->{hr} = {
1351 wakaba 1.40 %HTMLEmptyChecker,
1352 wakaba 1.1 };
1353    
1354     $Element->{$HTML_NS}->{br} = {
1355 wakaba 1.40 %HTMLEmptyChecker,
1356 wakaba 1.29 ## NOTE: Blank line MUST NOT be used for presentation purpose.
1357     ## (This requirement is semantic so that we cannot check.)
1358 wakaba 1.1 };
1359    
1360     $Element->{$HTML_NS}->{dialog} = {
1361 wakaba 1.40 %HTMLChecker,
1362     check_start => sub {
1363     my ($self, $item, $element_state) = @_;
1364     $element_state->{phase} = 'before dt';
1365     },
1366     check_child_element => sub {
1367     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1368     $child_is_transparent, $element_state) = @_;
1369     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1370     $self->{onerror}->(node => $child_el,
1371     type => 'element not allowed:minus',
1372     level => $self->{must_level});
1373     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1374     #
1375     } elsif ($element_state->{phase} eq 'before dt') {
1376     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1377     $element_state->{phase} = 'before dd';
1378     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1379     $self->{onerror}
1380     ->(node => $child_el, type => 'ps element missing:dt');
1381     $element_state->{phase} = 'before dt';
1382     } else {
1383     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1384     }
1385     } elsif ($element_state->{phase} eq 'before dd') {
1386     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1387     $element_state->{phase} = 'before dt';
1388     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1389     $self->{onerror}
1390     ->(node => $child_el, type => 'ps element missing:dd');
1391     $element_state->{phase} = 'before dd';
1392     } else {
1393     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1394 wakaba 1.1 }
1395 wakaba 1.40 } else {
1396     die "check_child_element: Bad |dialog| phase: $element_state->{phase}";
1397     }
1398     },
1399     check_child_text => sub {
1400     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1401     if ($has_significant) {
1402     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1403 wakaba 1.1 }
1404 wakaba 1.40 },
1405     check_end => sub {
1406     my ($self, $item, $element_state) = @_;
1407     if ($element_state->{phase} eq 'before dd') {
1408     $self->{onerror}->(node => $item->{node},
1409     type => 'child element missing:dd');
1410 wakaba 1.1 }
1411 wakaba 1.40
1412     $HTMLChecker{check_end}->(@_);
1413 wakaba 1.1 },
1414     };
1415    
1416     $Element->{$HTML_NS}->{pre} = {
1417 wakaba 1.40 %HTMLPhrasingContentChecker,
1418 wakaba 1.1 };
1419    
1420     $Element->{$HTML_NS}->{ol} = {
1421 wakaba 1.40 %HTMLChecker,
1422     check_attrs => $GetHTMLAttrsChecker->({
1423 wakaba 1.1 start => $HTMLIntegerAttrChecker,
1424     }),
1425 wakaba 1.40 check_child_element => sub {
1426     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1427     $child_is_transparent, $element_state) = @_;
1428     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1429     $self->{onerror}->(node => $child_el,
1430     type => 'element not allowed:minus',
1431     level => $self->{must_level});
1432     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1433     #
1434     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
1435     #
1436     } else {
1437     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1438 wakaba 1.1 }
1439 wakaba 1.40 },
1440     check_child_text => sub {
1441     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1442     if ($has_significant) {
1443     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1444 wakaba 1.1 }
1445     },
1446     };
1447    
1448     $Element->{$HTML_NS}->{ul} = {
1449 wakaba 1.40 %{$Element->{$HTML_NS}->{ol}},
1450 wakaba 1.1 };
1451    
1452     $Element->{$HTML_NS}->{li} = {
1453 wakaba 1.40 %HTMLProseContentChecker,
1454     check_attrs => $GetHTMLAttrsChecker->({
1455 wakaba 1.1 start => sub {
1456     my ($self, $attr) = @_;
1457     my $parent = $attr->owner_element->manakai_parent_element;
1458     if (defined $parent) {
1459     my $parent_ns = $parent->namespace_uri;
1460     $parent_ns = '' unless defined $parent_ns;
1461     my $parent_ln = $parent->manakai_local_name;
1462     unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') {
1463     $self->{onerror}->(node => $attr, level => 'unsupported',
1464     type => 'attribute');
1465     }
1466     }
1467     $HTMLIntegerAttrChecker->($self, $attr);
1468     },
1469     }),
1470 wakaba 1.40 check_child_element => sub {
1471     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1472     $child_is_transparent, $element_state) = @_;
1473     if ($self->{flag}->{in_menu}) {
1474     $HTMLPhrasingContentChecker{check_child_element}->(@_);
1475     } else {
1476     $HTMLProseContentChecker{check_child_element}->(@_);
1477     }
1478     },
1479     check_child_text => sub {
1480     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1481     if ($self->{flag}->{in_menu}) {
1482     $HTMLPhrasingContentChecker{check_child_text}->(@_);
1483 wakaba 1.1 } else {
1484 wakaba 1.40 $HTMLProseContentChecker{check_child_text}->(@_);
1485 wakaba 1.1 }
1486     },
1487     };
1488    
1489     $Element->{$HTML_NS}->{dl} = {
1490 wakaba 1.40 %HTMLChecker,
1491     check_start => sub {
1492     my ($self, $item, $element_state) = @_;
1493     $element_state->{phase} = 'before dt';
1494     },
1495     check_child_element => sub {
1496     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1497     $child_is_transparent, $element_state) = @_;
1498     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1499     $self->{onerror}->(node => $child_el,
1500     type => 'element not allowed:minus',
1501     level => $self->{must_level});
1502     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1503     #
1504     } elsif ($element_state->{phase} eq 'in dds') {
1505     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1506     #$element_state->{phase} = 'in dds';
1507     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1508     $element_state->{phase} = 'in dts';
1509     } else {
1510     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1511     }
1512     } elsif ($element_state->{phase} eq 'in dts') {
1513     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1514     #$element_state->{phase} = 'in dts';
1515     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1516     $element_state->{phase} = 'in dds';
1517     } else {
1518     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1519     }
1520     } elsif ($element_state->{phase} eq 'before dt') {
1521     if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') {
1522     $element_state->{phase} = 'in dts';
1523     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') {
1524     $self->{onerror}
1525     ->(node => $child_el, type => 'ps element missing:dt');
1526     $element_state->{phase} = 'in dds';
1527     } else {
1528     $self->{onerror}->(node => $child_el, type => 'element not allowed');
1529 wakaba 1.1 }
1530 wakaba 1.40 } else {
1531     die "check_child_element: Bad |dl| phase: $element_state->{phase}";
1532 wakaba 1.1 }
1533 wakaba 1.40 },
1534     check_child_text => sub {
1535     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1536     if ($has_significant) {
1537     $self->{onerror}->(node => $child_node, type => 'character not allowed');
1538     }
1539     },
1540     check_end => sub {
1541     my ($self, $item, $element_state) = @_;
1542     if ($element_state->{phase} eq 'in dts') {
1543     $self->{onerror}->(node => $item->{node},
1544     type => 'child element missing:dd');
1545 wakaba 1.1 }
1546    
1547 wakaba 1.40 $HTMLChecker{check_end}->(@_);
1548 wakaba 1.1 },
1549     };
1550    
1551     $Element->{$HTML_NS}->{dt} = {
1552 wakaba 1.40 %HTMLPhrasingContentChecker,
1553 wakaba 1.1 };
1554    
1555     $Element->{$HTML_NS}->{dd} = {
1556 wakaba 1.40 %HTMLProseContentChecker,
1557 wakaba 1.1 };
1558    
1559     $Element->{$HTML_NS}->{a} = {
1560 wakaba 1.40 %HTMLPhrasingContentChecker,
1561     check_attrs => sub {
1562     my ($self, $item, $element_state) = @_;
1563 wakaba 1.1 my %attr;
1564 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
1565 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
1566     $attr_ns = '' unless defined $attr_ns;
1567     my $attr_ln = $attr->manakai_local_name;
1568     my $checker;
1569     if ($attr_ns eq '') {
1570     $checker = {
1571     target => $HTMLTargetAttrChecker,
1572     href => $HTMLURIAttrChecker,
1573     ping => $HTMLSpaceURIsAttrChecker,
1574 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
1575 wakaba 1.1 media => $HTMLMQAttrChecker,
1576     hreflang => $HTMLLanguageTagAttrChecker,
1577     type => $HTMLIMTAttrChecker,
1578     }->{$attr_ln};
1579     if ($checker) {
1580     $attr{$attr_ln} = $attr;
1581     } else {
1582     $checker = $HTMLAttrChecker->{$attr_ln};
1583     }
1584     }
1585     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
1586     || $AttrChecker->{$attr_ns}->{''};
1587     if ($checker) {
1588     $checker->($self, $attr) if ref $checker;
1589     } else {
1590     $self->{onerror}->(node => $attr, level => 'unsupported',
1591     type => 'attribute');
1592     ## ISSUE: No comformance createria for unknown attributes in the spec
1593     }
1594     }
1595    
1596 wakaba 1.40 $element_state->{in_a_href_original} = $self->{flag}->{in_a_href};
1597 wakaba 1.4 if (defined $attr{href}) {
1598     $self->{has_hyperlink_element} = 1;
1599 wakaba 1.40 $self->{flag}->{in_a_href} = 1;
1600 wakaba 1.4 } else {
1601 wakaba 1.1 for (qw/target ping rel media hreflang type/) {
1602     if (defined $attr{$_}) {
1603     $self->{onerror}->(node => $attr{$_},
1604     type => 'attribute not allowed');
1605     }
1606     }
1607     }
1608     },
1609 wakaba 1.40 check_start => sub {
1610     my ($self, $item, $element_state) = @_;
1611     $self->_add_minus_elements ($element_state, $HTMLInteractiveContent);
1612     },
1613     check_end => sub {
1614     my ($self, $item, $element_state) = @_;
1615     $self->_remove_minus_elements ($element_state);
1616 wakaba 1.1
1617 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1618 wakaba 1.1 },
1619     };
1620    
1621     $Element->{$HTML_NS}->{q} = {
1622 wakaba 1.40 %HTMLPhrasingContentChecker,
1623     check_attrs => $GetHTMLAttrsChecker->({
1624 wakaba 1.1 cite => $HTMLURIAttrChecker,
1625     }),
1626     };
1627    
1628     $Element->{$HTML_NS}->{cite} = {
1629 wakaba 1.40 %HTMLPhrasingContentChecker,
1630 wakaba 1.1 };
1631    
1632     $Element->{$HTML_NS}->{em} = {
1633 wakaba 1.40 %HTMLPhrasingContentChecker,
1634 wakaba 1.1 };
1635    
1636     $Element->{$HTML_NS}->{strong} = {
1637 wakaba 1.40 %HTMLPhrasingContentChecker,
1638 wakaba 1.1 };
1639    
1640     $Element->{$HTML_NS}->{small} = {
1641 wakaba 1.40 %HTMLPhrasingContentChecker,
1642 wakaba 1.1 };
1643    
1644 wakaba 1.38 $Element->{$HTML_NS}->{mark} = {
1645 wakaba 1.40 %HTMLPhrasingContentChecker,
1646 wakaba 1.1 };
1647    
1648     $Element->{$HTML_NS}->{dfn} = {
1649 wakaba 1.40 %HTMLPhrasingContentChecker,
1650     check_start => sub {
1651     my ($self, $item, $element_state) = @_;
1652     $self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}});
1653 wakaba 1.1
1654 wakaba 1.40 my $node = $item->{node};
1655 wakaba 1.1 my $term = $node->get_attribute_ns (undef, 'title');
1656     unless (defined $term) {
1657     for my $child (@{$node->child_nodes}) {
1658     if ($child->node_type == 1) { # ELEMENT_NODE
1659     if (defined $term) {
1660     undef $term;
1661     last;
1662     } elsif ($child->manakai_local_name eq 'abbr') {
1663     my $nsuri = $child->namespace_uri;
1664     if (defined $nsuri and $nsuri eq $HTML_NS) {
1665     my $attr = $child->get_attribute_node_ns (undef, 'title');
1666     if ($attr) {
1667     $term = $attr->value;
1668     }
1669     }
1670     }
1671     } elsif ($child->node_type == 3 or $child->node_type == 4) {
1672     ## TEXT_NODE or CDATA_SECTION_NODE
1673     if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace
1674     next;
1675     }
1676     undef $term;
1677     last;
1678     }
1679     }
1680     unless (defined $term) {
1681     $term = $node->text_content;
1682     }
1683     }
1684     if ($self->{term}->{$term}) {
1685     $self->{onerror}->(node => $node, type => 'duplicate term');
1686     push @{$self->{term}->{$term}}, $node;
1687     } else {
1688     $self->{term}->{$term} = [$node];
1689     }
1690     ## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn|
1691     ## has |title|.
1692 wakaba 1.40 },
1693     check_end => sub {
1694     my ($self, $item, $element_state) = @_;
1695     $self->_remove_minus_elements ($element_state);
1696 wakaba 1.1
1697 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1698 wakaba 1.1 },
1699     };
1700    
1701     $Element->{$HTML_NS}->{abbr} = {
1702 wakaba 1.40 %HTMLPhrasingContentChecker,
1703 wakaba 1.1 };
1704    
1705     $Element->{$HTML_NS}->{time} = {
1706 wakaba 1.40 %HTMLPhrasingContentChecker,
1707     check_attrs => $GetHTMLAttrsChecker->({
1708 wakaba 1.1 datetime => sub { 1 }, # checked in |checker|
1709     }),
1710     ## TODO: Write tests
1711 wakaba 1.40 check_end => sub {
1712     my ($self, $item, $element_state) = @_;
1713 wakaba 1.1
1714 wakaba 1.40 my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime');
1715 wakaba 1.1 my $input;
1716     my $reg_sp;
1717     my $input_node;
1718     if ($attr) {
1719     $input = $attr->value;
1720     $reg_sp = qr/[\x09-\x0D\x20]*/;
1721     $input_node = $attr;
1722     } else {
1723 wakaba 1.40 $input = $item->{node}->text_content;
1724 wakaba 1.1 $reg_sp = qr/\p{Zs}*/;
1725 wakaba 1.40 $input_node = $item->{node};
1726 wakaba 1.1
1727     ## ISSUE: What is the definition for "successfully extracts a date
1728     ## or time"? If the algorithm says the string is invalid but
1729     ## return some date or time, is it "successfully"?
1730     }
1731    
1732     my $hour;
1733     my $minute;
1734     my $second;
1735     if ($input =~ /
1736     \A
1737     [\x09-\x0D\x20]*
1738     ([0-9]+) # 1
1739     (?>
1740     -([0-9]+) # 2
1741     -([0-9]+) # 3
1742     [\x09-\x0D\x20]*
1743     (?>
1744     T
1745     [\x09-\x0D\x20]*
1746     )?
1747     ([0-9]+) # 4
1748     :([0-9]+) # 5
1749     (?>
1750     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6
1751     )?
1752     [\x09-\x0D\x20]*
1753     (?>
1754     Z
1755     [\x09-\x0D\x20]*
1756     |
1757     [+-]([0-9]+):([0-9]+) # 7, 8
1758     [\x09-\x0D\x20]*
1759     )?
1760     \z
1761     |
1762     :([0-9]+) # 9
1763     (?>
1764     :([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10
1765     )?
1766     [\x09-\x0D\x20]*\z
1767     )
1768     /x) {
1769     if (defined $2) { ## YYYY-MM-DD T? hh:mm
1770     if (length $1 != 4 or length $2 != 2 or length $3 != 2 or
1771     length $4 != 2 or length $5 != 2) {
1772     $self->{onerror}->(node => $input_node,
1773     type => 'dateortime:syntax error');
1774     }
1775    
1776     if (1 <= $2 and $2 <= 12) {
1777     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1778     if $3 < 1 or
1779     $3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2];
1780     $self->{onerror}->(node => $input_node, type => 'datetime:bad day')
1781     if $2 == 2 and $3 == 29 and
1782     not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0));
1783     } else {
1784     $self->{onerror}->(node => $input_node,
1785     type => 'datetime:bad month');
1786     }
1787    
1788     ($hour, $minute, $second) = ($4, $5, $6);
1789    
1790     if (defined $7) { ## [+-]hh:mm
1791     if (length $7 != 2 or length $8 != 2) {
1792     $self->{onerror}->(node => $input_node,
1793     type => 'dateortime:syntax error');
1794     }
1795    
1796     $self->{onerror}->(node => $input_node,
1797     type => 'datetime:bad timezone hour')
1798     if $7 > 23;
1799     $self->{onerror}->(node => $input_node,
1800     type => 'datetime:bad timezone minute')
1801     if $8 > 59;
1802     }
1803     } else { ## hh:mm
1804     if (length $1 != 2 or length $9 != 2) {
1805     $self->{onerror}->(node => $input_node,
1806     type => qq'dateortime:syntax error');
1807     }
1808    
1809     ($hour, $minute, $second) = ($1, $9, $10);
1810     }
1811    
1812     $self->{onerror}->(node => $input_node, type => 'datetime:bad hour')
1813     if $hour > 23;
1814     $self->{onerror}->(node => $input_node, type => 'datetime:bad minute')
1815     if $minute > 59;
1816    
1817     if (defined $second) { ## s
1818     ## NOTE: Integer part of second don't have to have length of two.
1819    
1820     if (substr ($second, 0, 1) eq '.') {
1821     $self->{onerror}->(node => $input_node,
1822     type => 'dateortime:syntax error');
1823     }
1824    
1825     $self->{onerror}->(node => $input_node, type => 'datetime:bad second')
1826     if $second >= 60;
1827     }
1828     } else {
1829     $self->{onerror}->(node => $input_node,
1830     type => 'dateortime:syntax error');
1831     }
1832    
1833 wakaba 1.40 $HTMLPhrasingContentChecker{check_end}->(@_);
1834 wakaba 1.1 },
1835     };
1836    
1837     $Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element"
1838 wakaba 1.40 %HTMLPhrasingContentChecker,
1839     check_attrs => $GetHTMLAttrsChecker->({
1840 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1841     min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1842     low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1843     high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1844     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1845     optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }),
1846     }),
1847     };
1848    
1849     $Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content
1850 wakaba 1.40 %HTMLPhrasingContentChecker,
1851     check_attrs => $GetHTMLAttrsChecker->({
1852 wakaba 1.1 value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }),
1853     max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }),
1854     }),
1855     };
1856    
1857     $Element->{$HTML_NS}->{code} = {
1858 wakaba 1.40 %HTMLPhrasingContentChecker,
1859 wakaba 1.1 };
1860    
1861     $Element->{$HTML_NS}->{var} = {
1862 wakaba 1.40 %HTMLPhrasingContentChecker,
1863 wakaba 1.1 };
1864    
1865     $Element->{$HTML_NS}->{samp} = {
1866 wakaba 1.40 %HTMLPhrasingContentChecker,
1867 wakaba 1.1 };
1868    
1869     $Element->{$HTML_NS}->{kbd} = {
1870 wakaba 1.40 %HTMLPhrasingContentChecker,
1871 wakaba 1.1 };
1872    
1873     $Element->{$HTML_NS}->{sub} = {
1874 wakaba 1.40 %HTMLPhrasingContentChecker,
1875 wakaba 1.1 };
1876    
1877     $Element->{$HTML_NS}->{sup} = {
1878 wakaba 1.40 %HTMLPhrasingContentChecker,
1879 wakaba 1.1 };
1880    
1881     $Element->{$HTML_NS}->{span} = {
1882 wakaba 1.40 %HTMLPhrasingContentChecker,
1883 wakaba 1.1 };
1884    
1885     $Element->{$HTML_NS}->{i} = {
1886 wakaba 1.40 %HTMLPhrasingContentChecker,
1887 wakaba 1.1 };
1888    
1889     $Element->{$HTML_NS}->{b} = {
1890 wakaba 1.40 %HTMLPhrasingContentChecker,
1891 wakaba 1.1 };
1892    
1893     $Element->{$HTML_NS}->{bdo} = {
1894 wakaba 1.40 %HTMLPhrasingContentChecker,
1895     check_attrs => sub {
1896     my ($self, $item, $element_state) = @_;
1897     $GetHTMLAttrsChecker->({})->($self, $item, $element_state);
1898     unless ($item->{node}->has_attribute_ns (undef, 'dir')) {
1899     $self->{onerror}->(node => $item->{node},
1900     type => 'attribute missing:dir');
1901 wakaba 1.1 }
1902     },
1903     ## ISSUE: The spec does not directly say that |dir| is a enumerated attr.
1904     };
1905    
1906 wakaba 1.29 =pod
1907    
1908     ## TODO:
1909    
1910     +
1911     + <p>Partly because of the confusion described above, authors are
1912     + strongly recommended to always mark up all paragraphs with the
1913     + <code>p</code> element, and to not have any <code>ins</code> or
1914     + <code>del</code> elements that cross across any <span
1915     + title="paragraph">implied paragraphs</span>.</p>
1916     +
1917     (An informative note)
1918    
1919     <p><code>ins</code> elements should not cross <span
1920     + title="paragraph">implied paragraph</span> boundaries.</p>
1921     (normative)
1922    
1923     + <p><code>del</code> elements should not cross <span
1924     + title="paragraph">implied paragraph</span> boundaries.</p>
1925     (normative)
1926    
1927     =cut
1928    
1929 wakaba 1.1 $Element->{$HTML_NS}->{ins} = {
1930 wakaba 1.40 %HTMLTransparentChecker,
1931     check_attrs => $GetHTMLAttrsChecker->({
1932 wakaba 1.1 cite => $HTMLURIAttrChecker,
1933     datetime => $HTMLDatetimeAttrChecker,
1934     }),
1935     };
1936    
1937     $Element->{$HTML_NS}->{del} = {
1938 wakaba 1.40 %HTMLTransparentChecker,
1939     check_attrs => $GetHTMLAttrsChecker->({
1940 wakaba 1.1 cite => $HTMLURIAttrChecker,
1941     datetime => $HTMLDatetimeAttrChecker,
1942     }),
1943 wakaba 1.40 check_end => sub {
1944     my ($self, $item, $element_state) = @_;
1945     if ($element_state->{has_significant}) {
1946     ## NOTE: Significantness flag does not propagate.
1947     } elsif ($item->{transparent}) {
1948     #
1949     } else {
1950     $self->{onerror}->(node => $item->{node},
1951     level => $self->{should_level},
1952     type => 'no significant content');
1953     }
1954 wakaba 1.1 },
1955     };
1956    
1957 wakaba 1.35 $Element->{$HTML_NS}->{figure} = {
1958 wakaba 1.40 %HTMLProseContentChecker,
1959 wakaba 1.41 ## NOTE: legend, Prose | Prose, legend
1960     check_child_element => sub {
1961     my ($self, $item, $child_el, $child_nsuri, $child_ln,
1962     $child_is_transparent, $element_state) = @_;
1963     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
1964     $self->{onerror}->(node => $child_el,
1965     type => 'element not allowed:minus',
1966     level => $self->{must_level});
1967     $element_state->{has_non_legend} = 1;
1968     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
1969     #
1970     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
1971     if ($element_state->{has_legend_at_first}) {
1972     $self->{onerror}->(node => $child_el,
1973     type => 'element not allowed:figure legend',
1974     level => $self->{must_level});
1975     } elsif ($element_state->{has_legend}) {
1976     $self->{onerror}->(node => $element_state->{has_legend},
1977     type => 'element not allowed:figure legend',
1978     level => $self->{must_level});
1979     $element_state->{has_legend} = $child_el;
1980     } elsif ($element_state->{has_non_legend}) {
1981     $element_state->{has_legend} = $child_el;
1982     } else {
1983     $element_state->{has_legend_at_first} = 1;
1984 wakaba 1.35 }
1985 wakaba 1.41 delete $element_state->{has_non_legend};
1986     } else {
1987     $HTMLProseContentChecker{check_child_element}->(@_);
1988 wakaba 1.43 $element_state->{has_non_legend} = 1 unless $child_is_transparent;
1989 wakaba 1.41 }
1990     },
1991     check_child_text => sub {
1992     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
1993     if ($has_significant) {
1994     $element_state->{has_non_legend} = 1;
1995 wakaba 1.35 }
1996 wakaba 1.41 },
1997     check_end => sub {
1998     my ($self, $item, $element_state) = @_;
1999 wakaba 1.35
2000 wakaba 1.41 if ($element_state->{has_legend_at_first}) {
2001     #
2002     } elsif ($element_state->{has_legend}) {
2003     if ($element_state->{has_non_legend}) {
2004     $self->{onerror}->(node => $element_state->{has_legend},
2005 wakaba 1.35 type => 'element not allowed:figure legend',
2006     level => $self->{must_level});
2007     }
2008     } else {
2009 wakaba 1.41 $self->{onerror}->(node => $item->{node},
2010 wakaba 1.35 type => 'element missing:legend',
2011     level => $self->{must_level});
2012     }
2013 wakaba 1.41
2014     $HTMLProseContentChecker{check_end}->(@_);
2015     ## ISSUE: |<figure><legend>aa</legend></figure>| should be an error?
2016 wakaba 1.35 },
2017     };
2018 wakaba 1.8 ## TODO: Test for <nest/> in <figure/>
2019 wakaba 1.1
2020     $Element->{$HTML_NS}->{img} = {
2021 wakaba 1.40 %HTMLEmptyChecker,
2022     check_attrs => sub {
2023     my ($self, $item, $element_state) = @_;
2024 wakaba 1.1 $GetHTMLAttrsChecker->({
2025     alt => sub { }, ## NOTE: No syntactical requirement
2026     src => $HTMLURIAttrChecker,
2027     usemap => $HTMLUsemapAttrChecker,
2028     ismap => sub {
2029 wakaba 1.40 my ($self, $attr, $parent_item) = @_;
2030     if (not $self->{flag}->{in_a_href}) {
2031 wakaba 1.15 $self->{onerror}->(node => $attr,
2032     type => 'attribute not allowed:ismap');
2033 wakaba 1.1 }
2034 wakaba 1.40 $GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item);
2035 wakaba 1.1 },
2036     ## TODO: height
2037     ## TODO: width
2038 wakaba 1.40 })->($self, $item);
2039     unless ($item->{node}->has_attribute_ns (undef, 'alt')) {
2040     $self->{onerror}->(node => $item->{node},
2041 wakaba 1.37 type => 'attribute missing:alt',
2042     level => $self->{should_level});
2043 wakaba 1.1 }
2044 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2045     $self->{onerror}->(node => $item->{node},
2046     type => 'attribute missing:src');
2047 wakaba 1.1 }
2048     },
2049     };
2050    
2051     $Element->{$HTML_NS}->{iframe} = {
2052 wakaba 1.40 %HTMLTextChecker,
2053     check_attrs => $GetHTMLAttrsChecker->({
2054 wakaba 1.1 src => $HTMLURIAttrChecker,
2055     }),
2056 wakaba 1.40 };
2057    
2058 wakaba 1.1 $Element->{$HTML_NS}->{embed} = {
2059 wakaba 1.40 %HTMLEmptyChecker,
2060     check_attrs => sub {
2061     my ($self, $item, $element_state) = @_;
2062 wakaba 1.1 my $has_src;
2063 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2064 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2065     $attr_ns = '' unless defined $attr_ns;
2066     my $attr_ln = $attr->manakai_local_name;
2067     my $checker;
2068     if ($attr_ns eq '') {
2069     if ($attr_ln eq 'src') {
2070     $checker = $HTMLURIAttrChecker;
2071     $has_src = 1;
2072     } elsif ($attr_ln eq 'type') {
2073     $checker = $HTMLIMTAttrChecker;
2074     } else {
2075     ## TODO: height
2076     ## TODO: width
2077     $checker = $HTMLAttrChecker->{$attr_ln}
2078     || sub { }; ## NOTE: Any local attribute is ok.
2079     }
2080     }
2081     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2082     || $AttrChecker->{$attr_ns}->{''};
2083     if ($checker) {
2084     $checker->($self, $attr);
2085     } else {
2086     $self->{onerror}->(node => $attr, level => 'unsupported',
2087     type => 'attribute');
2088     ## ISSUE: No comformance createria for global attributes in the spec
2089     }
2090     }
2091    
2092     unless ($has_src) {
2093 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2094 wakaba 1.1 type => 'attribute missing:src');
2095     }
2096     },
2097     };
2098    
2099     $Element->{$HTML_NS}->{object} = {
2100 wakaba 1.40 %HTMLTransparentChecker,
2101     check_attrs => sub {
2102     my ($self, $item, $element_state) = @_;
2103 wakaba 1.1 $GetHTMLAttrsChecker->({
2104     data => $HTMLURIAttrChecker,
2105     type => $HTMLIMTAttrChecker,
2106     usemap => $HTMLUsemapAttrChecker,
2107     ## TODO: width
2108     ## TODO: height
2109 wakaba 1.40 })->($self, $item);
2110     unless ($item->{node}->has_attribute_ns (undef, 'data')) {
2111     unless ($item->{node}->has_attribute_ns (undef, 'type')) {
2112     $self->{onerror}->(node => $item->{node},
2113 wakaba 1.1 type => 'attribute missing:data|type');
2114     }
2115     }
2116     },
2117 wakaba 1.41 ## NOTE: param*, transparent (Prose)
2118     check_child_element => sub {
2119     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2120     $child_is_transparent, $element_state) = @_;
2121     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2122     $self->{onerror}->(node => $child_el,
2123     type => 'element not allowed:minus',
2124     level => $self->{must_level});
2125     $element_state->{has_non_legend} = 1;
2126     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2127     #
2128     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') {
2129     if ($element_state->{has_non_param}) {
2130     $self->{onerror}->(node => $child_el,
2131     type => 'element not allowed:prose',
2132     level => $self->{must_level});
2133 wakaba 1.39 }
2134 wakaba 1.41 } else {
2135     $HTMLProseContentChecker{check_child_element}->(@_);
2136     $element_state->{has_non_param} = 1;
2137 wakaba 1.39 }
2138 wakaba 1.25 },
2139 wakaba 1.41 check_child_text => sub {
2140     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2141     if ($has_significant) {
2142     $element_state->{has_non_param} = 1;
2143     }
2144 wakaba 1.42 },
2145     check_end => sub {
2146     my ($self, $item, $element_state) = @_;
2147     if ($element_state->{has_significant}) {
2148 wakaba 1.46 $item->{real_parent_state}->{has_significant} = 1;
2149 wakaba 1.42 } elsif ($item->{node}->manakai_parent_element) {
2150     ## NOTE: Transparent.
2151     } else {
2152     $self->{onerror}->(node => $item->{node},
2153     level => $self->{should_level},
2154     type => 'no significant content');
2155     }
2156     },
2157 wakaba 1.8 ## TODO: Tests for <nest/> in <object/>
2158 wakaba 1.1 };
2159 wakaba 1.41 ## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming?
2160     ## What about |<section><object data><style scoped></style>x</object></section>|?
2161     ## |<section><ins></ins><object data><style scoped></style>x</object></section>|?
2162 wakaba 1.1
2163     $Element->{$HTML_NS}->{param} = {
2164 wakaba 1.40 %HTMLEmptyChecker,
2165     check_attrs => sub {
2166     my ($self, $item, $element_state) = @_;
2167 wakaba 1.1 $GetHTMLAttrsChecker->({
2168     name => sub { },
2169     value => sub { },
2170 wakaba 1.40 })->($self, $item);
2171     unless ($item->{node}->has_attribute_ns (undef, 'name')) {
2172     $self->{onerror}->(node => $item->{node},
2173 wakaba 1.1 type => 'attribute missing:name');
2174     }
2175 wakaba 1.40 unless ($item->{node}->has_attribute_ns (undef, 'value')) {
2176     $self->{onerror}->(node => $item->{node},
2177 wakaba 1.1 type => 'attribute missing:value');
2178     }
2179     },
2180     };
2181    
2182     $Element->{$HTML_NS}->{video} = {
2183 wakaba 1.40 %HTMLTransparentChecker,
2184     check_attrs => $GetHTMLAttrsChecker->({
2185 wakaba 1.1 src => $HTMLURIAttrChecker,
2186     ## TODO: start, loopstart, loopend, end
2187     ## ISSUE: they MUST be "value time offset"s. Value?
2188 wakaba 1.11 ## ISSUE: playcount has no conformance creteria
2189 wakaba 1.1 autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2190     controls => $GetHTMLBooleanAttrChecker->('controls'),
2191 wakaba 1.11 poster => $HTMLURIAttrChecker, ## TODO: not for audio!
2192 wakaba 1.42 ## TODO: width, height
2193 wakaba 1.1 }),
2194 wakaba 1.42 check_start => sub {
2195     my ($self, $item, $element_state) = @_;
2196     $element_state->{allow_source}
2197     = not $item->{node}->has_attribute_ns (undef, 'src');
2198     $element_state->{has_source} ||= $element_state->{allow_source} * -1;
2199     ## NOTE: It might be set true by |check_element|.
2200     },
2201     check_child_element => sub {
2202     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2203     $child_is_transparent, $element_state) = @_;
2204     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2205     $self->{onerror}->(node => $child_el,
2206     type => 'element not allowed:minus',
2207     level => $self->{must_level});
2208     delete $element_state->{allow_source};
2209     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2210     #
2211     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'source') {
2212 wakaba 1.45 unless ($element_state->{allow_source}) {
2213 wakaba 1.42 $self->{onerror}->(node => $child_el,
2214     type => 'element not allowed:prose',
2215     level => $self->{must_level});
2216     }
2217 wakaba 1.45 $element_state->{has_source} = 1;
2218 wakaba 1.1 } else {
2219 wakaba 1.42 delete $element_state->{allow_source};
2220     $HTMLProseContentChecker{check_child_element}->(@_);
2221     }
2222     },
2223     check_child_text => sub {
2224     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2225     if ($has_significant) {
2226     delete $element_state->{allow_source};
2227     }
2228     $HTMLProseContentChecker{check_child_text}->(@_);
2229     },
2230     check_end => sub {
2231     my ($self, $item, $element_state) = @_;
2232     if ($element_state->{has_source} == -1) {
2233     $self->{onerror}->(node => $item->{node},
2234     type => 'element missing:source',
2235     level => $self->{must_level});
2236 wakaba 1.1 }
2237 wakaba 1.42
2238     $Element->{$HTML_NS}->{object}->{check_end}->(@_);
2239 wakaba 1.1 },
2240     };
2241    
2242     $Element->{$HTML_NS}->{audio} = {
2243 wakaba 1.40 %{$Element->{$HTML_NS}->{video}},
2244 wakaba 1.42 check_attrs => $GetHTMLAttrsChecker->({
2245     src => $HTMLURIAttrChecker,
2246     ## TODO: start, loopstart, loopend, end
2247     ## ISSUE: they MUST be "value time offset"s. Value?
2248     ## ISSUE: playcount has no conformance creteria
2249     autoplay => $GetHTMLBooleanAttrChecker->('autoplay'),
2250     controls => $GetHTMLBooleanAttrChecker->('controls'),
2251     }),
2252 wakaba 1.1 };
2253    
2254     $Element->{$HTML_NS}->{source} = {
2255 wakaba 1.40 %HTMLEmptyChecker,
2256     check_attrs => sub {
2257     my ($self, $item, $element_state) = @_;
2258 wakaba 1.1 $GetHTMLAttrsChecker->({
2259     src => $HTMLURIAttrChecker,
2260     type => $HTMLIMTAttrChecker,
2261     media => $HTMLMQAttrChecker,
2262 wakaba 1.40 })->($self, $item, $element_state);
2263     unless ($item->{node}->has_attribute_ns (undef, 'src')) {
2264     $self->{onerror}->(node => $item->{node},
2265 wakaba 1.1 type => 'attribute missing:src');
2266     }
2267     },
2268     };
2269    
2270     $Element->{$HTML_NS}->{canvas} = {
2271 wakaba 1.40 %HTMLTransparentChecker,
2272     check_attrs => $GetHTMLAttrsChecker->({
2273 wakaba 1.1 height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2274     width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }),
2275     }),
2276     };
2277    
2278     $Element->{$HTML_NS}->{map} = {
2279 wakaba 1.40 %HTMLProseContentChecker,
2280     check_attrs => sub {
2281     my ($self, $item, $element_state) = @_;
2282 wakaba 1.4 my $has_id;
2283     $GetHTMLAttrsChecker->({
2284     id => sub {
2285     ## NOTE: same as global |id=""|, with |$self->{map}| registeration
2286     my ($self, $attr) = @_;
2287     my $value = $attr->value;
2288     if (length $value > 0) {
2289     if ($self->{id}->{$value}) {
2290     $self->{onerror}->(node => $attr, type => 'duplicate ID');
2291     push @{$self->{id}->{$value}}, $attr;
2292     } else {
2293     $self->{id}->{$value} = [$attr];
2294     }
2295 wakaba 1.1 } else {
2296 wakaba 1.4 ## NOTE: MUST contain at least one character
2297     $self->{onerror}->(node => $attr, type => 'empty attribute value');
2298 wakaba 1.1 }
2299 wakaba 1.4 if ($value =~ /[\x09-\x0D\x20]/) {
2300     $self->{onerror}->(node => $attr, type => 'space in ID');
2301     }
2302     $self->{map}->{$value} ||= $attr;
2303     $has_id = 1;
2304     },
2305 wakaba 1.40 })->($self, $item, $element_state);
2306     $self->{onerror}->(node => $item->{node}, type => 'attribute missing:id')
2307 wakaba 1.4 unless $has_id;
2308     },
2309 wakaba 1.1 };
2310    
2311     $Element->{$HTML_NS}->{area} = {
2312 wakaba 1.40 %HTMLEmptyChecker,
2313     check_attrs => sub {
2314     my ($self, $item, $element_state) = @_;
2315 wakaba 1.1 my %attr;
2316     my $coords;
2317 wakaba 1.40 for my $attr (@{$item->{node}->attributes}) {
2318 wakaba 1.1 my $attr_ns = $attr->namespace_uri;
2319     $attr_ns = '' unless defined $attr_ns;
2320     my $attr_ln = $attr->manakai_local_name;
2321     my $checker;
2322     if ($attr_ns eq '') {
2323     $checker = {
2324     alt => sub { },
2325     ## NOTE: |alt| value has no conformance creteria.
2326     shape => $GetHTMLEnumeratedAttrChecker->({
2327     circ => -1, circle => 1,
2328     default => 1,
2329     poly => 1, polygon => -1,
2330     rect => 1, rectangle => -1,
2331     }),
2332     coords => sub {
2333     my ($self, $attr) = @_;
2334     my $value = $attr->value;
2335     if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) {
2336     $coords = [split /,/, $value];
2337     } else {
2338     $self->{onerror}->(node => $attr,
2339     type => 'coords:syntax error');
2340     }
2341     },
2342     target => $HTMLTargetAttrChecker,
2343     href => $HTMLURIAttrChecker,
2344     ping => $HTMLSpaceURIsAttrChecker,
2345 wakaba 1.40 rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) },
2346 wakaba 1.1 media => $HTMLMQAttrChecker,
2347     hreflang => $HTMLLanguageTagAttrChecker,
2348     type => $HTMLIMTAttrChecker,
2349     }->{$attr_ln};
2350     if ($checker) {
2351     $attr{$attr_ln} = $attr;
2352     } else {
2353     $checker = $HTMLAttrChecker->{$attr_ln};
2354     }
2355     }
2356     $checker ||= $AttrChecker->{$attr_ns}->{$attr_ln}
2357     || $AttrChecker->{$attr_ns}->{''};
2358     if ($checker) {
2359     $checker->($self, $attr) if ref $checker;
2360     } else {
2361     $self->{onerror}->(node => $attr, level => 'unsupported',
2362     type => 'attribute');
2363     ## ISSUE: No comformance createria for unknown attributes in the spec
2364     }
2365     }
2366    
2367     if (defined $attr{href}) {
2368 wakaba 1.4 $self->{has_hyperlink_element} = 1;
2369 wakaba 1.1 unless (defined $attr{alt}) {
2370 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2371 wakaba 1.1 type => 'attribute missing:alt');
2372     }
2373     } else {
2374     for (qw/target ping rel media hreflang type alt/) {
2375     if (defined $attr{$_}) {
2376     $self->{onerror}->(node => $attr{$_},
2377     type => 'attribute not allowed');
2378     }
2379     }
2380     }
2381    
2382     my $shape = 'rectangle';
2383     if (defined $attr{shape}) {
2384     $shape = {
2385     circ => 'circle', circle => 'circle',
2386     default => 'default',
2387     poly => 'polygon', polygon => 'polygon',
2388     rect => 'rectangle', rectangle => 'rectangle',
2389     }->{lc $attr{shape}->value} || 'rectangle';
2390     ## TODO: ASCII lowercase?
2391     }
2392    
2393     if ($shape eq 'circle') {
2394     if (defined $attr{coords}) {
2395     if (defined $coords) {
2396     if (@$coords == 3) {
2397     if ($coords->[2] < 0) {
2398     $self->{onerror}->(node => $attr{coords},
2399     type => 'coords:out of range:2');
2400     }
2401     } else {
2402     $self->{onerror}->(node => $attr{coords},
2403     type => 'coords:number:3:'.@$coords);
2404     }
2405     } else {
2406     ## NOTE: A syntax error has been reported.
2407     }
2408     } else {
2409 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2410 wakaba 1.1 type => 'attribute missing:coords');
2411     }
2412     } elsif ($shape eq 'default') {
2413     if (defined $attr{coords}) {
2414     $self->{onerror}->(node => $attr{coords},
2415     type => 'attribute not allowed');
2416     }
2417     } elsif ($shape eq 'polygon') {
2418     if (defined $attr{coords}) {
2419     if (defined $coords) {
2420     if (@$coords >= 6) {
2421     unless (@$coords % 2 == 0) {
2422     $self->{onerror}->(node => $attr{coords},
2423     type => 'coords:number:even:'.@$coords);
2424     }
2425     } else {
2426     $self->{onerror}->(node => $attr{coords},
2427     type => 'coords:number:>=6:'.@$coords);
2428     }
2429     } else {
2430     ## NOTE: A syntax error has been reported.
2431     }
2432     } else {
2433 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2434 wakaba 1.1 type => 'attribute missing:coords');
2435     }
2436     } elsif ($shape eq 'rectangle') {
2437     if (defined $attr{coords}) {
2438     if (defined $coords) {
2439     if (@$coords == 4) {
2440     unless ($coords->[0] < $coords->[2]) {
2441     $self->{onerror}->(node => $attr{coords},
2442     type => 'coords:out of range:0');
2443     }
2444     unless ($coords->[1] < $coords->[3]) {
2445     $self->{onerror}->(node => $attr{coords},
2446     type => 'coords:out of range:1');
2447     }
2448     } else {
2449     $self->{onerror}->(node => $attr{coords},
2450     type => 'coords:number:4:'.@$coords);
2451     }
2452     } else {
2453     ## NOTE: A syntax error has been reported.
2454     }
2455     } else {
2456 wakaba 1.40 $self->{onerror}->(node => $item->{node},
2457 wakaba 1.1 type => 'attribute missing:coords');
2458     }
2459     }
2460     },
2461     };
2462     ## TODO: only in map
2463    
2464     $Element->{$HTML_NS}->{table} = {
2465 wakaba 1.40 %HTMLChecker,
2466     check_start => sub {
2467     my ($self, $item, $element_state) = @_;
2468     $element_state->{phase} = 'before caption';
2469     },
2470     check_child_element => sub {
2471     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2472     $child_is_transparent, $element_state) = @_;
2473     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2474     $self->{onerror}->(node => $child_el,
2475     type => 'element not allowed:minus',
2476     level => $self->{must_level});
2477     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2478     #
2479     } elsif ($element_state->{phase} eq 'in tbodys') {
2480     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2481     #$element_state->{phase} = 'in tbodys';
2482     } elsif (not $element_state->{has_tfoot} and
2483     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2484     $element_state->{phase} = 'after tfoot';
2485     $element_state->{has_tfoot} = 1;
2486     } else {
2487     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2488     }
2489     } elsif ($element_state->{phase} eq 'in trs') {
2490     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2491     #$element_state->{phase} = 'in trs';
2492     } elsif (not $element_state->{has_tfoot} and
2493     $child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2494     $element_state->{phase} = 'after tfoot';
2495     $element_state->{has_tfoot} = 1;
2496     } else {
2497     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2498     }
2499     } elsif ($element_state->{phase} eq 'after thead') {
2500     if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2501     $element_state->{phase} = 'in tbodys';
2502     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2503     $element_state->{phase} = 'in trs';
2504     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2505     $element_state->{phase} = 'in tbodys';
2506     $element_state->{has_tfoot} = 1;
2507     } else {
2508     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2509     }
2510     } elsif ($element_state->{phase} eq 'in colgroup') {
2511     if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2512     $element_state->{phase} = 'in colgroup';
2513     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2514     $element_state->{phase} = 'after thead';
2515     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2516     $element_state->{phase} = 'in tbodys';
2517     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2518     $element_state->{phase} = 'in trs';
2519     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2520     $element_state->{phase} = 'in tbodys';
2521     $element_state->{has_tfoot} = 1;
2522     } else {
2523     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2524     }
2525     } elsif ($element_state->{phase} eq 'before caption') {
2526     if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') {
2527     $element_state->{phase} = 'in colgroup';
2528     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') {
2529     $element_state->{phase} = 'in colgroup';
2530     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') {
2531     $element_state->{phase} = 'after thead';
2532     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') {
2533     $element_state->{phase} = 'in tbodys';
2534     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2535     $element_state->{phase} = 'in trs';
2536     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') {
2537     $element_state->{phase} = 'in tbodys';
2538     $element_state->{has_tfoot} = 1;
2539     } else {
2540     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2541     }
2542     } elsif ($element_state->{phase} eq 'after tfoot') {
2543     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2544     } else {
2545     die "check_child_element: Bad |table| phase: $element_state->{phase}";
2546     }
2547     },
2548     check_child_text => sub {
2549     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2550     if ($has_significant) {
2551     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2552 wakaba 1.1 }
2553 wakaba 1.40 },
2554     check_end => sub {
2555     my ($self, $item, $element_state) = @_;
2556 wakaba 1.1
2557     ## Table model errors
2558     require Whatpm::HTMLTable;
2559 wakaba 1.40 Whatpm::HTMLTable->form_table ($item->{node}, sub {
2560 wakaba 1.1 my %opt = @_;
2561     $self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node});
2562     });
2563 wakaba 1.40 push @{$self->{return}->{table}}, $item->{node};
2564 wakaba 1.1
2565 wakaba 1.40 $HTMLChecker{check_end}->(@_);
2566 wakaba 1.1 },
2567     };
2568    
2569     $Element->{$HTML_NS}->{caption} = {
2570 wakaba 1.40 %HTMLPhrasingContentChecker,
2571 wakaba 1.1 };
2572    
2573     $Element->{$HTML_NS}->{colgroup} = {
2574 wakaba 1.40 %HTMLEmptyChecker,
2575     check_attrs => $GetHTMLAttrsChecker->({
2576 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2577     ## NOTE: Defined only if "the |colgroup| element contains no |col| elements"
2578     ## TODO: "attribute not supported" if |col|.
2579     ## ISSUE: MUST NOT if any |col|?
2580     ## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)?
2581     }),
2582 wakaba 1.40 check_child_element => sub {
2583     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2584     $child_is_transparent, $element_state) = @_;
2585     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2586     $self->{onerror}->(node => $child_el,
2587     type => 'element not allowed:minus',
2588     level => $self->{must_level});
2589     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2590     #
2591     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') {
2592     #
2593     } else {
2594     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2595     }
2596     },
2597     check_child_text => sub {
2598     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2599     if ($has_significant) {
2600     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2601 wakaba 1.1 }
2602     },
2603     };
2604    
2605     $Element->{$HTML_NS}->{col} = {
2606 wakaba 1.40 %HTMLEmptyChecker,
2607     check_attrs => $GetHTMLAttrsChecker->({
2608 wakaba 1.1 span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2609     }),
2610     };
2611    
2612     $Element->{$HTML_NS}->{tbody} = {
2613 wakaba 1.40 %HTMLChecker,
2614     check_child_element => sub {
2615     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2616     $child_is_transparent, $element_state) = @_;
2617     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2618     $self->{onerror}->(node => $child_el,
2619     type => 'element not allowed:minus',
2620     level => $self->{must_level});
2621     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2622     #
2623     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') {
2624     $element_state->{has_tr} = 1;
2625     } else {
2626     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2627     }
2628     },
2629     check_child_text => sub {
2630     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2631     if ($has_significant) {
2632     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2633 wakaba 1.1 }
2634 wakaba 1.40 },
2635     check_end => sub {
2636     my ($self, $item, $element_state) = @_;
2637     unless ($element_state->{has_tr}) {
2638     $self->{onerror}->(node => $item->{node},
2639     type => 'child element missing:tr');
2640 wakaba 1.1 }
2641 wakaba 1.40
2642     $HTMLChecker{check_end}->(@_);
2643 wakaba 1.1 },
2644     };
2645    
2646     $Element->{$HTML_NS}->{thead} = {
2647 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2648 wakaba 1.1 };
2649    
2650     $Element->{$HTML_NS}->{tfoot} = {
2651 wakaba 1.40 %{$Element->{$HTML_NS}->{tbody}},
2652 wakaba 1.1 };
2653    
2654     $Element->{$HTML_NS}->{tr} = {
2655 wakaba 1.40 %HTMLChecker,
2656     check_child_element => sub {
2657     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2658     $child_is_transparent, $element_state) = @_;
2659     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2660     $self->{onerror}->(node => $child_el,
2661     type => 'element not allowed:minus',
2662     level => $self->{must_level});
2663     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2664     #
2665     } elsif ($child_nsuri eq $HTML_NS and
2666     ($child_ln eq 'td' or $child_ln eq 'th')) {
2667     $element_state->{has_cell} = 1;
2668     } else {
2669     $self->{onerror}->(node => $child_el, type => 'element not allowed');
2670     }
2671     },
2672     check_child_text => sub {
2673     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2674     if ($has_significant) {
2675     $self->{onerror}->(node => $child_node, type => 'character not allowed');
2676 wakaba 1.1 }
2677 wakaba 1.40 },
2678     check_end => sub {
2679     my ($self, $item, $element_state) = @_;
2680     unless ($element_state->{has_cell}) {
2681     $self->{onerror}->(node => $item->{node},
2682     type => 'child element missing:td|th');
2683 wakaba 1.1 }
2684 wakaba 1.40
2685     $HTMLChecker{check_end}->(@_);
2686 wakaba 1.1 },
2687     };
2688    
2689     $Element->{$HTML_NS}->{td} = {
2690 wakaba 1.40 %HTMLProseContentChecker,
2691     check_attrs => $GetHTMLAttrsChecker->({
2692 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2693     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2694     }),
2695     };
2696    
2697     $Element->{$HTML_NS}->{th} = {
2698 wakaba 1.40 %HTMLPhrasingContentChecker,
2699     check_attrs => $GetHTMLAttrsChecker->({
2700 wakaba 1.1 colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2701     rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }),
2702     scope => $GetHTMLEnumeratedAttrChecker
2703     ->({row => 1, col => 1, rowgroup => 1, colgroup => 1}),
2704     }),
2705     };
2706    
2707     ## TODO: forms
2708 wakaba 1.8 ## TODO: Tests for <nest/> in form elements
2709 wakaba 1.1
2710     $Element->{$HTML_NS}->{script} = {
2711 wakaba 1.40 %HTMLChecker,
2712     check_attrs => $GetHTMLAttrsChecker->({
2713 wakaba 1.1 src => $HTMLURIAttrChecker,
2714     defer => $GetHTMLBooleanAttrChecker->('defer'),
2715     async => $GetHTMLBooleanAttrChecker->('async'),
2716     type => $HTMLIMTAttrChecker,
2717 wakaba 1.9 }),
2718 wakaba 1.40 check_start => sub {
2719     my ($self, $item, $element_state) = @_;
2720 wakaba 1.1
2721 wakaba 1.40 if ($item->{node}->has_attribute_ns (undef, 'src')) {
2722     $element_state->{must_be_empty} = 1;
2723 wakaba 1.1 } else {
2724     ## NOTE: No content model conformance in HTML5 spec.
2725 wakaba 1.40 my $type = $item->{node}->get_attribute_ns (undef, 'type');
2726     my $language = $item->{node}->get_attribute_ns (undef, 'language');
2727 wakaba 1.1 if ((defined $type and $type eq '') or
2728     (defined $language and $language eq '')) {
2729     $type = 'text/javascript';
2730     } elsif (defined $type) {
2731     #
2732     } elsif (defined $language) {
2733     $type = 'text/' . $language;
2734     } else {
2735     $type = 'text/javascript';
2736     }
2737 wakaba 1.40 $element_state->{script_type} = $type; ## TODO: $type normalization
2738     }
2739     },
2740     check_child_element => sub {
2741     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2742     $child_is_transparent, $element_state) = @_;
2743     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2744     $self->{onerror}->(node => $child_el,
2745     type => 'element not allowed:minus',
2746     level => $self->{must_level});
2747     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2748     #
2749     } else {
2750     if ($element_state->{must_be_empty}) {
2751     $self->{onerror}->(node => $child_el,
2752     type => 'element not allowed');
2753     }
2754     }
2755     },
2756     check_child_text => sub {
2757     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2758     if ($has_significant and
2759     $element_state->{must_be_empty}) {
2760     $self->{onerror}->(node => $child_node,
2761     type => 'character not allowed');
2762     }
2763     },
2764     check_end => sub {
2765     my ($self, $item, $element_state) = @_;
2766     unless ($element_state->{must_be_empty}) {
2767     $self->{onerror}->(node => $item->{node}, level => 'unsupported',
2768     type => 'script:'.$element_state->{script_type});
2769     ## TODO: text/javascript support
2770    
2771     $HTMLChecker{check_end}->(@_);
2772 wakaba 1.1 }
2773     },
2774     };
2775 wakaba 1.25 ## ISSUE: Significant check and text child node
2776 wakaba 1.1
2777     ## NOTE: When script is disabled.
2778     $Element->{$HTML_NS}->{noscript} = {
2779 wakaba 1.40 %HTMLTransparentChecker,
2780     check_start => sub {
2781     my ($self, $item, $element_state) = @_;
2782 wakaba 1.3
2783 wakaba 1.40 unless ($item->{node}->owner_document->manakai_is_html) {
2784     $self->{onerror}->(node => $item->{node}, type => 'in XML:noscript');
2785 wakaba 1.3 }
2786    
2787 wakaba 1.40 unless ($self->{flag}->{in_head}) {
2788     $self->_add_minus_elements ($element_state,
2789     {$HTML_NS => {noscript => 1}});
2790     }
2791 wakaba 1.3 },
2792 wakaba 1.40 check_child_element => sub {
2793     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2794     $child_is_transparent, $element_state) = @_;
2795     if ($self->{flag}->{in_head}) {
2796     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2797     $self->{onerror}->(node => $child_el,
2798     type => 'element not allowed:minus',
2799     level => $self->{must_level});
2800     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2801     #
2802     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') {
2803     #
2804     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') {
2805     if ($child_el->has_attribute_ns (undef, 'scoped')) {
2806     $self->{onerror}->(node => $child_el,
2807     type => 'element not allowed:head noscript',
2808     level => $self->{must_level});
2809     }
2810     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') {
2811 wakaba 1.47 my $http_equiv_attr
2812     = $child_el->get_attribute_node_ns (undef, 'http-equiv');
2813     if ($http_equiv_attr) {
2814     ## TODO: case
2815     if (lc $http_equiv_attr->value eq 'content-type') {
2816 wakaba 1.40 $self->{onerror}->(node => $child_el,
2817 wakaba 1.34 type => 'element not allowed:head noscript',
2818     level => $self->{must_level});
2819 wakaba 1.47 } else {
2820     #
2821 wakaba 1.3 }
2822 wakaba 1.47 } else {
2823     $self->{onerror}->(node => $child_el,
2824     type => 'element not allowed:head noscript',
2825     level => $self->{must_level});
2826 wakaba 1.3 }
2827 wakaba 1.40 } else {
2828     $self->{onerror}->(node => $child_el,
2829     type => 'element not allowed:head noscript',
2830     level => $self->{must_level});
2831     }
2832     } else {
2833     $HTMLTransparentChecker{check_child_element}->(@_);
2834     }
2835     },
2836     check_child_text => sub {
2837     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2838     if ($self->{flag}->{in_head}) {
2839     if ($has_significant) {
2840     $self->{onerror}->(node => $child_node,
2841     type => 'character not allowed');
2842 wakaba 1.3 }
2843     } else {
2844 wakaba 1.40 $HTMLTransparentChecker{check_child_text}->(@_);
2845     }
2846     },
2847     check_end => sub {
2848     my ($self, $item, $element_state) = @_;
2849     $self->_remove_minus_elements ($element_state);
2850     if ($self->{flag}->{in_head}) {
2851     $HTMLChecker{check_end}->(@_);
2852     } else {
2853     $HTMLPhrasingContentChecker{check_end}->(@_);
2854 wakaba 1.3 }
2855 wakaba 1.1 },
2856     };
2857 wakaba 1.3 ## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head>
2858 wakaba 1.1
2859     $Element->{$HTML_NS}->{'event-source'} = {
2860 wakaba 1.40 %HTMLEmptyChecker,
2861     check_attrs => $GetHTMLAttrsChecker->({
2862 wakaba 1.1 src => $HTMLURIAttrChecker,
2863     }),
2864     };
2865    
2866     $Element->{$HTML_NS}->{details} = {
2867 wakaba 1.40 %HTMLProseContentChecker,
2868     check_attrs => $GetHTMLAttrsChecker->({
2869 wakaba 1.1 open => $GetHTMLBooleanAttrChecker->('open'),
2870     }),
2871 wakaba 1.43 ## NOTE: legend, Prose
2872     check_child_element => sub {
2873     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2874     $child_is_transparent, $element_state) = @_;
2875     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2876     $self->{onerror}->(node => $child_el,
2877     type => 'element not allowed:minus',
2878     level => $self->{must_level});
2879     $element_state->{has_non_legend} = 1;
2880     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2881     #
2882     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') {
2883     if ($element_state->{has_non_legend}) {
2884     $self->{onerror}->(node => $child_el,
2885     type => 'element not allowed:details legend',
2886     level => $self->{must_level});
2887     }
2888     $element_state->{has_legend} = 1;
2889     $element_state->{has_non_legend} = 1;
2890     } else {
2891     $HTMLProseContentChecker{check_child_element}->(@_);
2892     $element_state->{has_non_legend} = 1 unless $child_is_transparent;
2893     ## ISSUE: |<details><object><legend>xx</legend></object>..</details>|
2894     ## is conforming?
2895     }
2896     },
2897     check_child_text => sub {
2898     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2899     if ($has_significant) {
2900     $element_state->{has_non_legend} = 1;
2901     }
2902     },
2903     check_end => sub {
2904     my ($self, $item, $element_state) = @_;
2905 wakaba 1.1
2906 wakaba 1.43 unless ($element_state->{has_legend}) {
2907     $self->{onerror}->(node => $item->{node},
2908     type => 'element missing:legend',
2909     level => $self->{must_level});
2910     }
2911    
2912     $HTMLProseContentChecker{check_end}->(@_);
2913     ## ISSUE: |<details><legend>aa</legend></details>| error?
2914 wakaba 1.1 },
2915     };
2916    
2917     $Element->{$HTML_NS}->{datagrid} = {
2918 wakaba 1.40 %HTMLProseContentChecker,
2919     check_attrs => $GetHTMLAttrsChecker->({
2920 wakaba 1.1 disabled => $GetHTMLBooleanAttrChecker->('disabled'),
2921     multiple => $GetHTMLBooleanAttrChecker->('multiple'),
2922     }),
2923 wakaba 1.40 check_start => sub {
2924     my ($self, $item, $element_state) = @_;
2925 wakaba 1.1
2926 wakaba 1.40 $self->_add_minus_elements ($element_state,
2927     {$HTML_NS => {a => 1, datagrid => 1}});
2928     $element_state->{phase} = 'any';
2929     },
2930     ## Prose -(text* table Prose*) | table | select | datalist | Empty
2931     check_child_element => sub {
2932     my ($self, $item, $child_el, $child_nsuri, $child_ln,
2933     $child_is_transparent, $element_state) = @_;
2934     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
2935     $self->{onerror}->(node => $child_el,
2936     type => 'element not allowed:minus',
2937     level => $self->{must_level});
2938     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
2939     #
2940     } elsif ($element_state->{phase} eq 'prose') {
2941     if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2942 wakaba 1.44 if (not $element_state->{has_element} and
2943 wakaba 1.40 $child_nsuri eq $HTML_NS and
2944     $child_ln eq 'table') {
2945     $self->{onerror}->(node => $child_el,
2946     type => 'element not allowed');
2947     } else {
2948 wakaba 1.8 #
2949 wakaba 1.1 }
2950 wakaba 1.40 } else {
2951     $self->{onerror}->(node => $child_el,
2952     type => 'element not allowed');
2953     }
2954 wakaba 1.43 $element_state->{has_element} = 1;
2955 wakaba 1.40 } elsif ($element_state->{phase} eq 'any') {
2956     if ($child_nsuri eq $HTML_NS and
2957     {table => 1, select => 1, datalist => 1}->{$child_ln}) {
2958     $element_state->{phase} = 'none';
2959     } elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) {
2960     $element_state->{has_element} = 1;
2961     $element_state->{phase} = 'prose';
2962 wakaba 1.43 ## TODO: transparent?
2963 wakaba 1.40 } else {
2964     $self->{onerror}->(node => $child_el,
2965     type => 'element not allowed');
2966     }
2967     } elsif ($element_state->{phase} eq 'none') {
2968     $self->{onerror}->(node => $child_el,
2969     type => 'element not allowed');
2970     } else {
2971     die "check_child_element: Bad |datagrid| phase: $element_state->{phase}";
2972     }
2973     },
2974     check_child_text => sub {
2975     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
2976     if ($has_significant) {
2977     if ($element_state->{phase} eq 'prose') {
2978     #
2979     } elsif ($element_state->{phase} eq 'any') {
2980     $element_state->{phase} = 'prose';
2981     } else {
2982     $self->{onerror}->(node => $child_node,
2983     type => 'character not allowed');
2984 wakaba 1.1 }
2985     }
2986 wakaba 1.40 },
2987     check_end => sub {
2988     my ($self, $item, $element_state) = @_;
2989     $self->_remove_minus_elements ($element_state);
2990 wakaba 1.1
2991 wakaba 1.40 if ($element_state->{phase} eq 'none') {
2992     $HTMLChecker{check_end}->(@_);
2993     } else {
2994     $HTMLPhrasingContentChecker{check_end}->(@_);
2995     }
2996     },
2997 wakaba 1.29 ## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa"
2998     ## are not disallowed (assuming that form control contents are also
2999     ## prose content).
3000 wakaba 1.1 };
3001    
3002     $Element->{$HTML_NS}->{command} = {
3003 wakaba 1.40 %HTMLEmptyChecker,
3004     check_attrs => $GetHTMLAttrsChecker->({
3005 wakaba 1.1 checked => $GetHTMLBooleanAttrChecker->('checked'),
3006     default => $GetHTMLBooleanAttrChecker->('default'),
3007     disabled => $GetHTMLBooleanAttrChecker->('disabled'),
3008     hidden => $GetHTMLBooleanAttrChecker->('hidden'),
3009     icon => $HTMLURIAttrChecker,
3010     label => sub { }, ## NOTE: No conformance creteria
3011     radiogroup => sub { }, ## NOTE: No conformance creteria
3012     ## NOTE: |title| has special semantics, but no syntactical difference
3013     type => sub {
3014     my ($self, $attr) = @_;
3015     my $value = $attr->value;
3016     unless ({command => 1, checkbox => 1, radio => 1}->{$value}) {
3017     $self->{onerror}->(node => $attr, type => 'attribute value not allowed');
3018     }
3019     },
3020     }),
3021     };
3022    
3023     $Element->{$HTML_NS}->{menu} = {
3024 wakaba 1.40 %HTMLPhrasingContentChecker,
3025     check_attrs => $GetHTMLAttrsChecker->({
3026 wakaba 1.1 autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'),
3027     id => sub {
3028     ## NOTE: same as global |id=""|, with |$self->{menu}| registeration
3029     my ($self, $attr) = @_;
3030     my $value = $attr->value;
3031     if (length $value > 0) {
3032     if ($self->{id}->{$value}) {
3033     $self->{onerror}->(node => $attr, type => 'duplicate ID');
3034     push @{$self->{id}->{$value}}, $attr;
3035     } else {
3036     $self->{id}->{$value} = [$attr];
3037     }
3038     } else {
3039     ## NOTE: MUST contain at least one character
3040     $self->{onerror}->(node => $attr, type => 'empty attribute value');
3041     }
3042     if ($value =~ /[\x09-\x0D\x20]/) {
3043     $self->{onerror}->(node => $attr, type => 'space in ID');
3044     }
3045     $self->{menu}->{$value} ||= $attr;
3046     ## ISSUE: <menu id=""><p contextmenu=""> match?
3047     },
3048     label => sub { }, ## NOTE: No conformance creteria
3049     type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}),
3050     }),
3051 wakaba 1.40 check_start => sub {
3052     my ($self, $item, $element_state) = @_;
3053     $element_state->{phase} = 'li or phrasing';
3054     $element_state->{in_menu_original} = $self->{flag}->{in_menu};
3055     $self->{flag}->{in_menu} = 1;
3056     },
3057     check_child_element => sub {
3058     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3059     $child_is_transparent, $element_state) = @_;
3060     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3061     $self->{onerror}->(node => $child_el,
3062     type => 'element not allowed:minus',
3063     level => $self->{must_level});
3064     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3065     #
3066     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') {
3067     if ($element_state->{phase} eq 'li') {
3068     #
3069     } elsif ($element_state->{phase} eq 'li or phrasing') {
3070     $element_state->{phase} = 'li';
3071     } else {
3072     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3073     }
3074     } elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) {
3075     if ($element_state->{phase} eq 'phrasing') {
3076     #
3077     } elsif ($element_state->{phase} eq 'li or phrasing') {
3078     $element_state->{phase} = 'phrasing';
3079     } else {
3080     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3081     }
3082     } else {
3083     $self->{onerror}->(node => $child_el, type => 'element not allowed');
3084     }
3085     },
3086     check_child_text => sub {
3087     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3088     if ($has_significant) {
3089     if ($element_state->{phase} eq 'phrasing') {
3090     #
3091     } elsif ($element_state->{phase} eq 'li or phrasing') {
3092     $element_state->{phase} = 'phrasing';
3093     } else {
3094     $self->{onerror}->(node => $child_node,
3095     type => 'character not allowed');
3096 wakaba 1.1 }
3097     }
3098 wakaba 1.40 },
3099     check_end => sub {
3100     my ($self, $item, $element_state) = @_;
3101     delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original};
3102    
3103     if ($element_state->{phase} eq 'li') {
3104     $HTMLChecker{check_end}->(@_);
3105     } else { # 'phrasing' or 'li or phrasing'
3106     $HTMLPhrasingContentChecker{check_end}->(@_);
3107 wakaba 1.1 }
3108     },
3109 wakaba 1.8 };
3110    
3111     $Element->{$HTML_NS}->{datatemplate} = {
3112 wakaba 1.40 %HTMLChecker,
3113     check_child_element => sub {
3114     my ($self, $item, $child_el, $child_nsuri, $child_ln,
3115     $child_is_transparent, $element_state) = @_;
3116     if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) {
3117     $self->{onerror}->(node => $child_el,
3118     type => 'element not allowed:minus',
3119     level => $self->{must_level});
3120     } elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) {
3121     #
3122     } elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') {
3123     #
3124     } else {
3125     $self->{onerror}->(node => $child_el,
3126     type => 'element not allowed:datatemplate');
3127     }
3128     },
3129     check_child_text => sub {
3130     my ($self, $item, $child_node, $has_significant, $element_state) = @_;
3131     if ($has_significant) {
3132     $self->{onerror}->(node => $child_node, type => 'character not allowed');
3133 wakaba 1.8 }
3134     },
3135     is_xml_root => 1,
3136     };
3137    
3138     $Element->{$HTML_NS}->{rule} = {
3139 wakaba 1.40 %HTMLChecker,
3140     check_attrs => $GetHTMLAttrsChecker->({
3141 wakaba 1.23 condition => $HTMLSelectorsAttrChecker,
3142 wakaba 1.18 mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker,
3143 wakaba 1.8 }),
3144 wakaba 1.40 check_start => sub {
3145     my ($self, $item, $element_state) = @_;
3146     $self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}});
3147     },
3148     check_child_element => sub { },
3149     check_child_text => sub { },
3150     check_end => sub {
3151     my ($self, $item, $element_state) = @_;
3152     $self->_remove_plus_elements ($element_state);
3153     $HTMLChecker{check_end}->(@_);
3154 wakaba 1.8 },
3155     ## NOTE: "MAY be anything that, when the parent |datatemplate|
3156     ## is applied to some conforming data, results in a conforming DOM tree.":
3157     ## We don't check against this.
3158     };
3159    
3160     $Element->{$HTML_NS}->{nest} = {
3161 wakaba 1.40 %HTMLEmptyChecker,
3162     check_attrs => $GetHTMLAttrsChecker->({
3163 wakaba 1.23 filter => $HTMLSelectorsAttrChecker,
3164     mode => sub {
3165     my ($self, $attr) = @_;
3166     my $value = $attr->value;
3167     if ($value !~ /\A[^\x09-\x0D\x20]+\z/) {
3168     $self->{onerror}->(node => $attr, type => 'mode:syntax error');
3169     }
3170     },
3171 wakaba 1.8 }),
3172 wakaba 1.1 };
3173    
3174     $Element->{$HTML_NS}->{legend} = {
3175 wakaba 1.40 %HTMLPhrasingContentChecker,
3176 wakaba 1.1 };
3177    
3178     $Element->{$HTML_NS}->{div} = {
3179 wakaba 1.40 %HTMLProseContentChecker,
3180 wakaba 1.1 };
3181    
3182     $Element->{$HTML_NS}->{font} = {
3183 wakaba 1.40 %HTMLTransparentChecker,
3184     check_attrs => $GetHTMLAttrsChecker->({}), ## TODO
3185 wakaba 1.1 };
3186    
3187     $Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1;
3188    
3189     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24