1 |
wakaba |
1.1 |
package Whatpm::ContentChecker; |
2 |
|
|
use strict; |
3 |
|
|
require Whatpm::ContentChecker; |
4 |
|
|
|
5 |
|
|
my $HTML_NS = q<http://www.w3.org/1999/xhtml>; |
6 |
|
|
|
7 |
wakaba |
1.29 |
## December 2007 HTML5 Classification |
8 |
|
|
|
9 |
|
|
my $HTMLMetadataContent = { |
10 |
|
|
$HTML_NS => { |
11 |
|
|
title => 1, base => 1, link => 1, style => 1, script => 1, noscript => 1, |
12 |
|
|
'event-source' => 1, command => 1, datatemplate => 1, |
13 |
|
|
## NOTE: A |meta| with no |name| element is not allowed as |
14 |
|
|
## a metadata content other than |head| element. |
15 |
|
|
meta => 1, |
16 |
|
|
}, |
17 |
|
|
## NOTE: RDF is mentioned in the HTML5 spec. |
18 |
|
|
## TODO: Other RDF elements? |
19 |
|
|
q<http://www.w3.org/1999/02/22-rdf-syntax-ns#> => {RDF => 1}, |
20 |
|
|
}; |
21 |
|
|
|
22 |
|
|
my $HTMLProseContent = { |
23 |
|
|
$HTML_NS => { |
24 |
|
|
section => 1, nav => 1, article => 1, blockquote => 1, aside => 1, |
25 |
|
|
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1, |
26 |
|
|
footer => 1, address => 1, p => 1, hr => 1, dialog => 1, pre => 1, |
27 |
|
|
ol => 1, ul => 1, dl => 1, figure => 1, map => 1, table => 1, |
28 |
|
|
details => 1, ## ISSUE: "Prose element" in spec. |
29 |
|
|
datagrid => 1, ## ISSUE: "Prose element" in spec. |
30 |
|
|
datatemplate => 1, |
31 |
|
|
div => 1, ## ISSUE: No category in spec. |
32 |
|
|
## NOTE: |style| is only allowed if |scoped| attribute is specified. |
33 |
|
|
## Additionally, it must be before any other element or |
34 |
|
|
## non-inter-element-whitespace text node. |
35 |
|
|
style => 1, |
36 |
|
|
|
37 |
wakaba |
1.38 |
br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1, |
38 |
wakaba |
1.29 |
dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1, |
39 |
|
|
var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1, |
40 |
|
|
b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1, |
41 |
|
|
command => 1, font => 1, |
42 |
|
|
a => 1, |
43 |
|
|
datagrid => 1, ## ISSUE: "Interactive element" in the spec. |
44 |
|
|
## NOTE: |area| is allowed only as a descendant of |map|. |
45 |
|
|
area => 1, |
46 |
|
|
|
47 |
|
|
ins => 1, del => 1, |
48 |
|
|
|
49 |
|
|
## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose. |
50 |
|
|
menu => 1, |
51 |
|
|
|
52 |
|
|
img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1, |
53 |
|
|
canvas => 1, |
54 |
|
|
}, |
55 |
|
|
|
56 |
|
|
## NOTE: Embedded |
57 |
|
|
q<http://www.w3.org/1998/Math/MathML> => {math => 1}, |
58 |
|
|
q<http://www.w3.org/2000/svg> => {svg => 1}, |
59 |
|
|
}; |
60 |
|
|
|
61 |
|
|
my $HTMLSectioningContent = { |
62 |
|
|
$HTML_NS => { |
63 |
|
|
section => 1, nav => 1, article => 1, blockquote => 1, aside => 1, |
64 |
|
|
## NOTE: |body| is only allowed in |html| element. |
65 |
|
|
body => 1, |
66 |
|
|
}, |
67 |
|
|
}; |
68 |
|
|
|
69 |
|
|
my $HTMLHeadingContent = { |
70 |
|
|
$HTML_NS => { |
71 |
|
|
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, header => 1, |
72 |
|
|
}, |
73 |
|
|
}; |
74 |
|
|
|
75 |
|
|
my $HTMLPhrasingContent = { |
76 |
|
|
## NOTE: All phrasing content is also prose content. |
77 |
|
|
$HTML_NS => { |
78 |
wakaba |
1.38 |
br => 1, q => 1, cite => 1, em => 1, strong => 1, small => 1, mark => 1, |
79 |
wakaba |
1.29 |
dfn => 1, abbr => 1, time => 1, progress => 1, meter => 1, code => 1, |
80 |
|
|
var => 1, samp => 1, kbd => 1, sub => 1, sup => 1, span => 1, i => 1, |
81 |
|
|
b => 1, bdo => 1, script => 1, noscript => 1, 'event-source' => 1, |
82 |
|
|
command => 1, font => 1, |
83 |
|
|
a => 1, |
84 |
|
|
datagrid => 1, ## ISSUE: "Interactive element" in the spec. |
85 |
|
|
## NOTE: |area| is allowed only as a descendant of |map|. |
86 |
|
|
area => 1, |
87 |
|
|
|
88 |
|
|
## NOTE: Transparent. |
89 |
|
|
ins => 1, del => 1, |
90 |
|
|
|
91 |
|
|
## NOTE: If there is a |menu| ancestor, phrasing. Otherwise, prose. |
92 |
|
|
menu => 1, |
93 |
|
|
|
94 |
|
|
img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1, |
95 |
|
|
canvas => 1, |
96 |
|
|
}, |
97 |
|
|
|
98 |
|
|
## NOTE: Embedded |
99 |
|
|
q<http://www.w3.org/1998/Math/MathML> => {math => 1}, |
100 |
|
|
q<http://www.w3.org/2000/svg> => {svg => 1}, |
101 |
|
|
|
102 |
|
|
## NOTE: And non-inter-element-whitespace text nodes. |
103 |
|
|
}; |
104 |
|
|
|
105 |
wakaba |
1.40 |
## $HTMLEmbeddedContent: See Whatpm::ContentChecker. |
106 |
wakaba |
1.29 |
|
107 |
|
|
my $HTMLInteractiveContent = { |
108 |
|
|
$HTML_NS => { |
109 |
|
|
a => 1, |
110 |
wakaba |
1.36 |
datagrid => 1, ## ISSUE: Categorized as "Inetractive element" |
111 |
wakaba |
1.29 |
}, |
112 |
|
|
}; |
113 |
|
|
|
114 |
wakaba |
1.36 |
## NOTE: $HTMLTransparentElements: See Whatpm::ContentChecker. |
115 |
|
|
## NOTE: Semi-transparent elements: See Whatpm::ContentChecker. |
116 |
|
|
|
117 |
|
|
## -- Common attribute syntacx checkers |
118 |
|
|
|
119 |
wakaba |
1.1 |
our $AttrChecker; |
120 |
|
|
|
121 |
|
|
my $GetHTMLEnumeratedAttrChecker = sub { |
122 |
|
|
my $states = shift; # {value => conforming ? 1 : -1} |
123 |
|
|
return sub { |
124 |
|
|
my ($self, $attr) = @_; |
125 |
|
|
my $value = lc $attr->value; ## TODO: ASCII case insensitibility? |
126 |
|
|
if ($states->{$value} > 0) { |
127 |
|
|
# |
128 |
|
|
} elsif ($states->{$value}) { |
129 |
|
|
$self->{onerror}->(node => $attr, type => 'enumerated:non-conforming'); |
130 |
|
|
} else { |
131 |
|
|
$self->{onerror}->(node => $attr, type => 'enumerated:invalid'); |
132 |
|
|
} |
133 |
|
|
}; |
134 |
|
|
}; # $GetHTMLEnumeratedAttrChecker |
135 |
|
|
|
136 |
|
|
my $GetHTMLBooleanAttrChecker = sub { |
137 |
|
|
my $local_name = shift; |
138 |
|
|
return sub { |
139 |
|
|
my ($self, $attr) = @_; |
140 |
|
|
my $value = $attr->value; |
141 |
|
|
unless ($value eq $local_name or $value eq '') { |
142 |
|
|
$self->{onerror}->(node => $attr, type => 'boolean:invalid'); |
143 |
|
|
} |
144 |
|
|
}; |
145 |
|
|
}; # $GetHTMLBooleanAttrChecker |
146 |
|
|
|
147 |
wakaba |
1.8 |
## Unordered set of space-separated tokens |
148 |
wakaba |
1.18 |
my $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker = sub { |
149 |
wakaba |
1.8 |
my ($self, $attr) = @_; |
150 |
|
|
my %word; |
151 |
|
|
for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) { |
152 |
|
|
unless ($word{$word}) { |
153 |
|
|
$word{$word} = 1; |
154 |
|
|
} else { |
155 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate token:'.$word); |
156 |
|
|
} |
157 |
|
|
} |
158 |
wakaba |
1.18 |
}; # $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker |
159 |
wakaba |
1.8 |
|
160 |
wakaba |
1.1 |
## |rel| attribute (unordered set of space separated tokens, |
161 |
|
|
## whose allowed values are defined by the section on link types) |
162 |
|
|
my $HTMLLinkTypesAttrChecker = sub { |
163 |
wakaba |
1.4 |
my ($a_or_area, $todo, $self, $attr) = @_; |
164 |
wakaba |
1.1 |
my %word; |
165 |
|
|
for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) { |
166 |
|
|
unless ($word{$word}) { |
167 |
|
|
$word{$word} = 1; |
168 |
wakaba |
1.18 |
} elsif ($word eq 'up') { |
169 |
|
|
# |
170 |
wakaba |
1.1 |
} else { |
171 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate token:'.$word); |
172 |
|
|
} |
173 |
|
|
} |
174 |
|
|
## NOTE: Case sensitive match (since HTML5 spec does not say link |
175 |
|
|
## types are case-insensitive and it says "The value should not |
176 |
|
|
## be confusingly similar to any other defined value (e.g. |
177 |
|
|
## differing only in case)."). |
178 |
|
|
## NOTE: Though there is no explicit "MUST NOT" for undefined values, |
179 |
|
|
## "MAY"s and "only ... MAY" restrict non-standard non-registered |
180 |
|
|
## values to be used conformingly. |
181 |
|
|
require Whatpm::_LinkTypeList; |
182 |
|
|
our $LinkType; |
183 |
|
|
for my $word (keys %word) { |
184 |
|
|
my $def = $LinkType->{$word}; |
185 |
|
|
if (defined $def) { |
186 |
|
|
if ($def->{status} eq 'accepted') { |
187 |
|
|
if (defined $def->{effect}->[$a_or_area]) { |
188 |
|
|
# |
189 |
|
|
} else { |
190 |
|
|
$self->{onerror}->(node => $attr, |
191 |
|
|
type => 'link type:bad context:'.$word); |
192 |
|
|
} |
193 |
|
|
} elsif ($def->{status} eq 'proposal') { |
194 |
|
|
$self->{onerror}->(node => $attr, level => 's', |
195 |
|
|
type => 'link type:proposed:'.$word); |
196 |
wakaba |
1.20 |
if (defined $def->{effect}->[$a_or_area]) { |
197 |
|
|
# |
198 |
|
|
} else { |
199 |
|
|
$self->{onerror}->(node => $attr, |
200 |
|
|
type => 'link type:bad context:'.$word); |
201 |
|
|
} |
202 |
wakaba |
1.1 |
} else { # rejected or synonym |
203 |
|
|
$self->{onerror}->(node => $attr, |
204 |
|
|
type => 'link type:non-conforming:'.$word); |
205 |
|
|
} |
206 |
wakaba |
1.4 |
if (defined $def->{effect}->[$a_or_area]) { |
207 |
|
|
if ($word eq 'alternate') { |
208 |
|
|
# |
209 |
|
|
} elsif ($def->{effect}->[$a_or_area] eq 'hyperlink') { |
210 |
|
|
$todo->{has_hyperlink_link_type} = 1; |
211 |
|
|
} |
212 |
|
|
} |
213 |
wakaba |
1.1 |
if ($def->{unique}) { |
214 |
|
|
unless ($self->{has_link_type}->{$word}) { |
215 |
|
|
$self->{has_link_type}->{$word} = 1; |
216 |
|
|
} else { |
217 |
|
|
$self->{onerror}->(node => $attr, |
218 |
|
|
type => 'link type:duplicate:'.$word); |
219 |
|
|
} |
220 |
|
|
} |
221 |
|
|
} else { |
222 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
223 |
|
|
type => 'link type:'.$word); |
224 |
|
|
} |
225 |
|
|
} |
226 |
wakaba |
1.4 |
$todo->{has_hyperlink_link_type} = 1 |
227 |
|
|
if $word{alternate} and not $word{stylesheet}; |
228 |
wakaba |
1.1 |
## TODO: The Pingback 1.0 specification, which is referenced by HTML5, |
229 |
|
|
## says that using both X-Pingback: header field and HTML |
230 |
|
|
## <link rel=pingback> is deprecated and if both appears they |
231 |
|
|
## SHOULD contain exactly the same value. |
232 |
|
|
## ISSUE: Pingback 1.0 specification defines the exact representation |
233 |
|
|
## of its link element, which cannot be tested by the current arch. |
234 |
|
|
## ISSUE: Pingback 1.0 specification says that the document MUST NOT |
235 |
|
|
## include any string that matches to the pattern for the rel=pingback link, |
236 |
|
|
## which again inpossible to test. |
237 |
|
|
## ISSUE: rel=pingback href MUST NOT include entities other than predefined 4. |
238 |
wakaba |
1.12 |
|
239 |
|
|
## NOTE: <link rel="up index"><link rel="up up index"> is not an error. |
240 |
wakaba |
1.17 |
## NOTE: We can't check "If the page is part of multiple hierarchies, |
241 |
|
|
## then they SHOULD be described in different paragraphs.". |
242 |
wakaba |
1.1 |
}; # $HTMLLinkTypesAttrChecker |
243 |
wakaba |
1.20 |
|
244 |
|
|
## TODO: "When an author uses a new type not defined by either this specification or the Wiki page, conformance checkers should offer to add the value to the Wiki, with the details described above, with the "proposal" status." |
245 |
wakaba |
1.1 |
|
246 |
|
|
## URI (or IRI) |
247 |
|
|
my $HTMLURIAttrChecker = sub { |
248 |
|
|
my ($self, $attr) = @_; |
249 |
|
|
## ISSUE: Relative references are allowed? (RFC 3987 "IRI" is an absolute reference with optional fragment identifier.) |
250 |
|
|
my $value = $attr->value; |
251 |
|
|
Whatpm::URIChecker->check_iri_reference ($value, sub { |
252 |
|
|
my %opt = @_; |
253 |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
254 |
|
|
type => 'URI::'.$opt{type}. |
255 |
|
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
256 |
|
|
}); |
257 |
wakaba |
1.17 |
$self->{has_uri_attr} = 1; ## TODO: <html manifest> |
258 |
wakaba |
1.1 |
}; # $HTMLURIAttrChecker |
259 |
|
|
|
260 |
|
|
## A space separated list of one or more URIs (or IRIs) |
261 |
|
|
my $HTMLSpaceURIsAttrChecker = sub { |
262 |
|
|
my ($self, $attr) = @_; |
263 |
|
|
my $i = 0; |
264 |
|
|
for my $value (split /[\x09-\x0D\x20]+/, $attr->value) { |
265 |
|
|
Whatpm::URIChecker->check_iri_reference ($value, sub { |
266 |
|
|
my %opt = @_; |
267 |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
268 |
wakaba |
1.2 |
type => 'URIs:'.':'. |
269 |
|
|
$opt{type}.':'.$i. |
270 |
wakaba |
1.1 |
(defined $opt{position} ? ':'.$opt{position} : '')); |
271 |
|
|
}); |
272 |
|
|
$i++; |
273 |
|
|
} |
274 |
|
|
## ISSUE: Relative references? |
275 |
|
|
## ISSUE: Leading or trailing white spaces are conformant? |
276 |
|
|
## ISSUE: A sequence of white space characters are conformant? |
277 |
|
|
## ISSUE: A zero-length string is conformant? (It does contain a relative reference, i.e. same as base URI.) |
278 |
|
|
## NOTE: Duplication seems not an error. |
279 |
wakaba |
1.4 |
$self->{has_uri_attr} = 1; |
280 |
wakaba |
1.1 |
}; # $HTMLSpaceURIsAttrChecker |
281 |
|
|
|
282 |
|
|
my $HTMLDatetimeAttrChecker = sub { |
283 |
|
|
my ($self, $attr) = @_; |
284 |
|
|
my $value = $attr->value; |
285 |
|
|
## ISSUE: "space", not "space character" (in parsing algorihtm, "space character") |
286 |
|
|
if ($value =~ /\A([0-9]{4})-([0-9]{2})-([0-9]{2})(?>[\x09-\x0D\x20]+(?>T[\x09-\x0D\x20]*)?|T[\x09-\x0D\x20]*)([0-9]{2}):([0-9]{2})(?>:([0-9]{2}))?(?>\.([0-9]+))?[\x09-\x0D\x20]*(?>Z|[+-]([0-9]{2}):([0-9]{2}))\z/) { |
287 |
|
|
my ($y, $M, $d, $h, $m, $s, $f, $zh, $zm) |
288 |
|
|
= ($1, $2, $3, $4, $5, $6, $7, $8, $9); |
289 |
|
|
if (0 < $M and $M < 13) { ## ISSUE: This is not explicitly specified (though in parsing algorithm) |
290 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad day') |
291 |
|
|
if $d < 1 or |
292 |
|
|
$d > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$M]; |
293 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad day') |
294 |
|
|
if $M == 2 and $d == 29 and |
295 |
|
|
not ($y % 400 == 0 or ($y % 4 == 0 and $y % 100 != 0)); |
296 |
|
|
} else { |
297 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad month'); |
298 |
|
|
} |
299 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad hour') if $h > 23; |
300 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad minute') if $m > 59; |
301 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad second') |
302 |
|
|
if defined $s and $s > 59; |
303 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad timezone hour') |
304 |
|
|
if $zh > 23; |
305 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:bad timezone minute') |
306 |
|
|
if $zm > 59; |
307 |
|
|
## ISSUE: Maybe timezone -00:00 should have same semantics as in RFC 3339. |
308 |
|
|
} else { |
309 |
|
|
$self->{onerror}->(node => $attr, type => 'datetime:syntax error'); |
310 |
|
|
} |
311 |
|
|
}; # $HTMLDatetimeAttrChecker |
312 |
|
|
|
313 |
|
|
my $HTMLIntegerAttrChecker = sub { |
314 |
|
|
my ($self, $attr) = @_; |
315 |
|
|
my $value = $attr->value; |
316 |
|
|
unless ($value =~ /\A-?[0-9]+\z/) { |
317 |
|
|
$self->{onerror}->(node => $attr, type => 'integer:syntax error'); |
318 |
|
|
} |
319 |
|
|
}; # $HTMLIntegerAttrChecker |
320 |
|
|
|
321 |
|
|
my $GetHTMLNonNegativeIntegerAttrChecker = sub { |
322 |
|
|
my $range_check = shift; |
323 |
|
|
return sub { |
324 |
|
|
my ($self, $attr) = @_; |
325 |
|
|
my $value = $attr->value; |
326 |
|
|
if ($value =~ /\A[0-9]+\z/) { |
327 |
|
|
unless ($range_check->($value + 0)) { |
328 |
|
|
$self->{onerror}->(node => $attr, type => 'nninteger:out of range'); |
329 |
|
|
} |
330 |
|
|
} else { |
331 |
|
|
$self->{onerror}->(node => $attr, |
332 |
|
|
type => 'nninteger:syntax error'); |
333 |
|
|
} |
334 |
|
|
}; |
335 |
|
|
}; # $GetHTMLNonNegativeIntegerAttrChecker |
336 |
|
|
|
337 |
|
|
my $GetHTMLFloatingPointNumberAttrChecker = sub { |
338 |
|
|
my $range_check = shift; |
339 |
|
|
return sub { |
340 |
|
|
my ($self, $attr) = @_; |
341 |
|
|
my $value = $attr->value; |
342 |
|
|
if ($value =~ /\A-?[0-9.]+\z/ and $value =~ /[0-9]/) { |
343 |
|
|
unless ($range_check->($value + 0)) { |
344 |
|
|
$self->{onerror}->(node => $attr, type => 'float:out of range'); |
345 |
|
|
} |
346 |
|
|
} else { |
347 |
|
|
$self->{onerror}->(node => $attr, |
348 |
|
|
type => 'float:syntax error'); |
349 |
|
|
} |
350 |
|
|
}; |
351 |
|
|
}; # $GetHTMLFloatingPointNumberAttrChecker |
352 |
|
|
|
353 |
|
|
## "A valid MIME type, optionally with parameters. [RFC 2046]" |
354 |
|
|
## ISSUE: RFC 2046 does not define syntax of media types. |
355 |
|
|
## ISSUE: The definition of "a valid MIME type" is unknown. |
356 |
|
|
## Syntactical correctness? |
357 |
|
|
my $HTMLIMTAttrChecker = sub { |
358 |
|
|
my ($self, $attr) = @_; |
359 |
|
|
my $value = $attr->value; |
360 |
|
|
## ISSUE: RFC 2045 Content-Type header field allows insertion |
361 |
|
|
## of LWS/comments between tokens. Is it allowed in HTML? Maybe no. |
362 |
|
|
## ISSUE: RFC 2231 extension? Maybe no. |
363 |
|
|
my $lws0 = qr/(?>(?>\x0D\x0A)?[\x09\x20])*/; |
364 |
|
|
my $token = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/; |
365 |
|
|
my $qs = qr/"(?>[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\x7E]|\x0D\x0A[\x09\x20]|\x5C[\x00-\x7F])*"/; |
366 |
|
|
if ($value =~ m#\A$lws0($token)$lws0/$lws0($token)$lws0((?>;$lws0$token$lws0=$lws0(?>$token|$qs)$lws0)*)\z#) { |
367 |
|
|
my @type = ($1, $2); |
368 |
|
|
my $param = $3; |
369 |
|
|
while ($param =~ s/^;$lws0($token)$lws0=$lws0(?>($token)|($qs))$lws0//) { |
370 |
|
|
if (defined $2) { |
371 |
|
|
push @type, $1 => $2; |
372 |
|
|
} else { |
373 |
|
|
my $n = $1; |
374 |
|
|
my $v = $2; |
375 |
|
|
$v =~ s/\\(.)/$1/gs; |
376 |
|
|
push @type, $n => $v; |
377 |
|
|
} |
378 |
|
|
} |
379 |
|
|
require Whatpm::IMTChecker; |
380 |
|
|
Whatpm::IMTChecker->check_imt (sub { |
381 |
|
|
my %opt = @_; |
382 |
|
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
383 |
|
|
type => 'IMT:'.$opt{type}); |
384 |
|
|
}, @type); |
385 |
|
|
} else { |
386 |
|
|
$self->{onerror}->(node => $attr, type => 'IMT:syntax error'); |
387 |
|
|
} |
388 |
|
|
}; # $HTMLIMTAttrChecker |
389 |
|
|
|
390 |
|
|
my $HTMLLanguageTagAttrChecker = sub { |
391 |
wakaba |
1.7 |
## NOTE: See also $AtomLanguageTagAttrChecker in Atom.pm. |
392 |
|
|
|
393 |
wakaba |
1.1 |
my ($self, $attr) = @_; |
394 |
wakaba |
1.6 |
my $value = $attr->value; |
395 |
|
|
require Whatpm::LangTag; |
396 |
|
|
Whatpm::LangTag->check_rfc3066_language_tag ($value, sub { |
397 |
|
|
my %opt = @_; |
398 |
|
|
my $type = 'LangTag:'.$opt{type}; |
399 |
|
|
$type .= ':' . $opt{subtag} if defined $opt{subtag}; |
400 |
|
|
$self->{onerror}->(node => $attr, type => $type, value => $opt{value}, |
401 |
|
|
level => $opt{level}); |
402 |
|
|
}); |
403 |
wakaba |
1.1 |
## ISSUE: RFC 4646 (3066bis)? |
404 |
wakaba |
1.6 |
|
405 |
|
|
## TODO: testdata |
406 |
wakaba |
1.1 |
}; # $HTMLLanguageTagAttrChecker |
407 |
|
|
|
408 |
|
|
## "A valid media query [MQ]" |
409 |
|
|
my $HTMLMQAttrChecker = sub { |
410 |
|
|
my ($self, $attr) = @_; |
411 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
412 |
|
|
type => 'media query'); |
413 |
|
|
## ISSUE: What is "a valid media query"? |
414 |
|
|
}; # $HTMLMQAttrChecker |
415 |
|
|
|
416 |
|
|
my $HTMLEventHandlerAttrChecker = sub { |
417 |
|
|
my ($self, $attr) = @_; |
418 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
419 |
|
|
type => 'event handler'); |
420 |
|
|
## TODO: MUST contain valid ECMAScript code matching the |
421 |
|
|
## ECMAScript |FunctionBody| production. [ECMA262] |
422 |
|
|
## ISSUE: MUST be ES3? E4X? ES4? JS1.x? |
423 |
|
|
## ISSUE: Automatic semicolon insertion does not apply? |
424 |
|
|
## ISSUE: Other script languages? |
425 |
|
|
}; # $HTMLEventHandlerAttrChecker |
426 |
|
|
|
427 |
|
|
my $HTMLUsemapAttrChecker = sub { |
428 |
|
|
my ($self, $attr) = @_; |
429 |
|
|
## MUST be a valid hashed ID reference to a |map| element |
430 |
|
|
my $value = $attr->value; |
431 |
|
|
if ($value =~ s/^#//) { |
432 |
|
|
## ISSUE: Is |usemap="#"| conformant? (c.f. |id=""| is non-conformant.) |
433 |
|
|
push @{$self->{usemap}}, [$value => $attr]; |
434 |
|
|
} else { |
435 |
|
|
$self->{onerror}->(node => $attr, type => '#idref:syntax error'); |
436 |
|
|
} |
437 |
|
|
## NOTE: Space characters in hashed ID references are conforming. |
438 |
|
|
## ISSUE: UA algorithm for matching is case-insensitive; IDs only different in cases should be reported |
439 |
|
|
}; # $HTMLUsemapAttrChecker |
440 |
|
|
|
441 |
|
|
my $HTMLTargetAttrChecker = sub { |
442 |
|
|
my ($self, $attr) = @_; |
443 |
|
|
my $value = $attr->value; |
444 |
|
|
if ($value =~ /^_/) { |
445 |
|
|
$value = lc $value; ## ISSUE: ASCII case-insentitive? |
446 |
|
|
unless ({ |
447 |
|
|
_self => 1, _parent => 1, _top => 1, |
448 |
|
|
}->{$value}) { |
449 |
|
|
$self->{onerror}->(node => $attr, |
450 |
|
|
type => 'reserved browsing context name'); |
451 |
|
|
} |
452 |
|
|
} else { |
453 |
wakaba |
1.29 |
## NOTE: An empty string is a valid browsing context name (same as _self). |
454 |
wakaba |
1.1 |
} |
455 |
|
|
}; # $HTMLTargetAttrChecker |
456 |
|
|
|
457 |
wakaba |
1.23 |
my $HTMLSelectorsAttrChecker = sub { |
458 |
|
|
my ($self, $attr) = @_; |
459 |
|
|
|
460 |
|
|
## ISSUE: Namespace resolution? |
461 |
|
|
|
462 |
|
|
my $value = $attr->value; |
463 |
|
|
|
464 |
|
|
require Whatpm::CSS::SelectorsParser; |
465 |
|
|
my $p = Whatpm::CSS::SelectorsParser->new; |
466 |
|
|
$p->{pseudo_class}->{$_} = 1 for qw/ |
467 |
|
|
active checked disabled empty enabled first-child first-of-type |
468 |
|
|
focus hover indeterminate last-child last-of-type link only-child |
469 |
|
|
only-of-type root target visited |
470 |
|
|
lang nth-child nth-last-child nth-of-type nth-last-of-type not |
471 |
|
|
-manakai-contains -manakai-current |
472 |
|
|
/; |
473 |
|
|
|
474 |
|
|
$p->{pseudo_element}->{$_} = 1 for qw/ |
475 |
|
|
after before first-letter first-line |
476 |
|
|
/; |
477 |
|
|
|
478 |
|
|
$p->{must_level} = $self->{must_level}; |
479 |
|
|
$p->{onerror} = sub { |
480 |
|
|
my %opt = @_; |
481 |
|
|
$opt{type} = 'selectors:'.$opt{type}; |
482 |
|
|
$self->{onerror}->(%opt, node => $attr); |
483 |
|
|
}; |
484 |
|
|
$p->parse_string ($value); |
485 |
|
|
}; # $HTMLSelectorsAttrChecker |
486 |
|
|
|
487 |
wakaba |
1.1 |
my $HTMLAttrChecker = { |
488 |
|
|
id => sub { |
489 |
|
|
## NOTE: |map| has its own variant of |id=""| checker |
490 |
|
|
my ($self, $attr) = @_; |
491 |
|
|
my $value = $attr->value; |
492 |
|
|
if (length $value > 0) { |
493 |
|
|
if ($self->{id}->{$value}) { |
494 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate ID'); |
495 |
|
|
push @{$self->{id}->{$value}}, $attr; |
496 |
|
|
} else { |
497 |
|
|
$self->{id}->{$value} = [$attr]; |
498 |
|
|
} |
499 |
|
|
if ($value =~ /[\x09-\x0D\x20]/) { |
500 |
|
|
$self->{onerror}->(node => $attr, type => 'space in ID'); |
501 |
|
|
} |
502 |
|
|
} else { |
503 |
|
|
## NOTE: MUST contain at least one character |
504 |
|
|
$self->{onerror}->(node => $attr, type => 'empty attribute value'); |
505 |
|
|
} |
506 |
|
|
}, |
507 |
|
|
title => sub {}, ## NOTE: No conformance creteria |
508 |
|
|
lang => sub { |
509 |
|
|
my ($self, $attr) = @_; |
510 |
wakaba |
1.6 |
my $value = $attr->value; |
511 |
|
|
if ($value eq '') { |
512 |
|
|
# |
513 |
|
|
} else { |
514 |
|
|
require Whatpm::LangTag; |
515 |
|
|
Whatpm::LangTag->check_rfc3066_language_tag ($value, sub { |
516 |
|
|
my %opt = @_; |
517 |
|
|
my $type = 'LangTag:'.$opt{type}; |
518 |
|
|
$type .= ':' . $opt{subtag} if defined $opt{subtag}; |
519 |
|
|
$self->{onerror}->(node => $attr, type => $type, value => $opt{value}, |
520 |
|
|
level => $opt{level}); |
521 |
|
|
}); |
522 |
|
|
} |
523 |
wakaba |
1.1 |
## ISSUE: RFC 4646 (3066bis)? |
524 |
|
|
unless ($attr->owner_document->manakai_is_html) { |
525 |
|
|
$self->{onerror}->(node => $attr, type => 'in XML:lang'); |
526 |
|
|
} |
527 |
wakaba |
1.6 |
|
528 |
|
|
## TODO: test data |
529 |
wakaba |
1.1 |
}, |
530 |
|
|
dir => $GetHTMLEnumeratedAttrChecker->({ltr => 1, rtl => 1}), |
531 |
|
|
class => sub { |
532 |
|
|
my ($self, $attr) = @_; |
533 |
|
|
my %word; |
534 |
|
|
for my $word (grep {length $_} split /[\x09-\x0D\x20]/, $attr->value) { |
535 |
|
|
unless ($word{$word}) { |
536 |
|
|
$word{$word} = 1; |
537 |
|
|
push @{$self->{return}->{class}->{$word}||=[]}, $attr; |
538 |
|
|
} else { |
539 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate token:'.$word); |
540 |
|
|
} |
541 |
|
|
} |
542 |
|
|
}, |
543 |
|
|
contextmenu => sub { |
544 |
|
|
my ($self, $attr) = @_; |
545 |
|
|
my $value = $attr->value; |
546 |
|
|
push @{$self->{contextmenu}}, [$value => $attr]; |
547 |
|
|
## ISSUE: "The value must be the ID of a menu element in the DOM." |
548 |
|
|
## What is "in the DOM"? A menu Element node that is not part |
549 |
|
|
## of the Document tree is in the DOM? A menu Element node that |
550 |
|
|
## belong to another Document tree is in the DOM? |
551 |
|
|
}, |
552 |
|
|
irrelevant => $GetHTMLBooleanAttrChecker->('irrelevant'), |
553 |
wakaba |
1.8 |
tabindex => $HTMLIntegerAttrChecker |
554 |
|
|
## TODO: ref, template, registrationmark |
555 |
wakaba |
1.1 |
}; |
556 |
|
|
|
557 |
|
|
for (qw/ |
558 |
|
|
onabort onbeforeunload onblur onchange onclick oncontextmenu |
559 |
|
|
ondblclick ondrag ondragend ondragenter ondragleave ondragover |
560 |
|
|
ondragstart ondrop onerror onfocus onkeydown onkeypress |
561 |
|
|
onkeyup onload onmessage onmousedown onmousemove onmouseout |
562 |
|
|
onmouseover onmouseup onmousewheel onresize onscroll onselect |
563 |
|
|
onsubmit onunload |
564 |
|
|
/) { |
565 |
|
|
$HTMLAttrChecker->{$_} = $HTMLEventHandlerAttrChecker; |
566 |
|
|
} |
567 |
|
|
|
568 |
|
|
my $GetHTMLAttrsChecker = sub { |
569 |
|
|
my $element_specific_checker = shift; |
570 |
|
|
return sub { |
571 |
wakaba |
1.40 |
my ($self, $item, $element_state) = @_; |
572 |
|
|
for my $attr (@{$item->{node}->attributes}) { |
573 |
wakaba |
1.1 |
my $attr_ns = $attr->namespace_uri; |
574 |
|
|
$attr_ns = '' unless defined $attr_ns; |
575 |
|
|
my $attr_ln = $attr->manakai_local_name; |
576 |
|
|
my $checker; |
577 |
|
|
if ($attr_ns eq '') { |
578 |
|
|
$checker = $element_specific_checker->{$attr_ln} |
579 |
wakaba |
1.40 |
|| $HTMLAttrChecker->{$attr_ln}; |
580 |
wakaba |
1.1 |
} |
581 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
582 |
wakaba |
1.40 |
|| $AttrChecker->{$attr_ns}->{''}; |
583 |
wakaba |
1.1 |
if ($checker) { |
584 |
wakaba |
1.40 |
$checker->($self, $attr, $item); |
585 |
wakaba |
1.1 |
} else { |
586 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
587 |
|
|
type => 'attribute'); |
588 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
589 |
|
|
} |
590 |
|
|
} |
591 |
|
|
}; |
592 |
|
|
}; # $GetHTMLAttrsChecker |
593 |
|
|
|
594 |
wakaba |
1.40 |
my %HTMLChecker = ( |
595 |
|
|
%Whatpm::ContentChecker::AnyChecker, |
596 |
|
|
check_attrs => $GetHTMLAttrsChecker->({}), |
597 |
|
|
); |
598 |
|
|
|
599 |
|
|
my %HTMLEmptyChecker = ( |
600 |
|
|
%HTMLChecker, |
601 |
|
|
check_child_element => sub { |
602 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
603 |
|
|
$child_is_transparent, $element_state) = @_; |
604 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
605 |
|
|
$self->{onerror}->(node => $child_el, |
606 |
|
|
type => 'element not allowed:minus', |
607 |
|
|
level => $self->{must_level}); |
608 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
609 |
|
|
# |
610 |
|
|
} else { |
611 |
|
|
$self->{onerror}->(node => $child_el, |
612 |
|
|
type => 'element not allowed:empty', |
613 |
|
|
level => $self->{must_level}); |
614 |
|
|
} |
615 |
|
|
}, |
616 |
|
|
check_child_text => sub { |
617 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
618 |
|
|
if ($has_significant) { |
619 |
|
|
$self->{onerror}->(node => $child_node, |
620 |
|
|
type => 'character not allowed:empty', |
621 |
|
|
level => $self->{must_level}); |
622 |
|
|
} |
623 |
|
|
}, |
624 |
|
|
); |
625 |
|
|
|
626 |
|
|
my %HTMLTextChecker = ( |
627 |
|
|
%HTMLChecker, |
628 |
|
|
check_child_element => sub { |
629 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
630 |
|
|
$child_is_transparent, $element_state) = @_; |
631 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
632 |
|
|
$self->{onerror}->(node => $child_el, |
633 |
|
|
type => 'element not allowed:minus', |
634 |
|
|
level => $self->{must_level}); |
635 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
636 |
|
|
# |
637 |
|
|
} else { |
638 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
639 |
|
|
} |
640 |
|
|
}, |
641 |
|
|
); |
642 |
|
|
|
643 |
|
|
my %HTMLProseContentChecker = ( |
644 |
|
|
%HTMLChecker, |
645 |
|
|
check_child_element => sub { |
646 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
647 |
|
|
$child_is_transparent, $element_state) = @_; |
648 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
649 |
|
|
$self->{onerror}->(node => $child_el, |
650 |
|
|
type => 'element not allowed:minus', |
651 |
|
|
level => $self->{must_level}); |
652 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
653 |
|
|
# |
654 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') { |
655 |
|
|
if ($element_state->{has_non_style} or |
656 |
|
|
not $child_el->has_attribute_ns (undef, 'scoped')) { |
657 |
|
|
$self->{onerror}->(node => $child_el, |
658 |
|
|
type => 'element not allowed:prose style', |
659 |
|
|
level => $self->{must_level}); |
660 |
|
|
} |
661 |
|
|
} elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) { |
662 |
|
|
$element_state->{has_non_style} = 1; |
663 |
|
|
} else { |
664 |
|
|
$element_state->{has_non_style} = 1; |
665 |
|
|
$self->{onerror}->(node => $child_el, |
666 |
|
|
type => 'element not allowed:prose', |
667 |
|
|
level => $self->{must_level}) |
668 |
|
|
} |
669 |
|
|
}, |
670 |
|
|
check_child_text => sub { |
671 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
672 |
|
|
if ($has_significant) { |
673 |
|
|
$element_state->{has_non_style} = 1; |
674 |
|
|
} |
675 |
|
|
}, |
676 |
|
|
check_end => sub { |
677 |
|
|
my ($self, $item, $element_state) = @_; |
678 |
|
|
if ($element_state->{has_significant}) { |
679 |
|
|
$item->{parent_state}->{has_significant} = 1; |
680 |
|
|
} elsif ($item->{transparent}) { |
681 |
|
|
# |
682 |
|
|
} else { |
683 |
|
|
$self->{onerror}->(node => $item->{node}, |
684 |
|
|
level => $self->{should_level}, |
685 |
|
|
type => 'no significant content'); |
686 |
|
|
} |
687 |
|
|
}, |
688 |
|
|
); |
689 |
|
|
|
690 |
|
|
my %HTMLPhrasingContentChecker = ( |
691 |
|
|
%HTMLChecker, |
692 |
|
|
check_child_element => sub { |
693 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
694 |
|
|
$child_is_transparent, $element_state) = @_; |
695 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
696 |
|
|
$self->{onerror}->(node => $child_el, |
697 |
|
|
type => 'element not allowed:minus', |
698 |
|
|
level => $self->{must_level}); |
699 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
700 |
|
|
# |
701 |
|
|
} elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) { |
702 |
|
|
# |
703 |
|
|
} else { |
704 |
|
|
$self->{onerror}->(node => $child_el, |
705 |
|
|
type => 'element not allowed:phrasing', |
706 |
|
|
level => $self->{must_level}); |
707 |
|
|
} |
708 |
|
|
}, |
709 |
|
|
check_end => $HTMLProseContentChecker{check_end}, |
710 |
|
|
## NOTE: The definition for |li| assumes that the only differences |
711 |
|
|
## between prose and phrasing content checkers are |check_child_element| |
712 |
|
|
## and |check_child_text|. |
713 |
|
|
); |
714 |
|
|
|
715 |
|
|
my %HTMLTransparentChecker = %HTMLProseContentChecker; |
716 |
|
|
## ISSUE: Significant content rule should be applied to transparent element |
717 |
|
|
## with parent? Currently, applied to |video| but not to others. |
718 |
|
|
|
719 |
wakaba |
1.1 |
our $Element; |
720 |
|
|
our $ElementDefault; |
721 |
|
|
|
722 |
|
|
$Element->{$HTML_NS}->{''} = { |
723 |
wakaba |
1.40 |
%HTMLChecker, |
724 |
|
|
check_start => $ElementDefault->{check_start}, |
725 |
wakaba |
1.1 |
}; |
726 |
|
|
|
727 |
|
|
$Element->{$HTML_NS}->{html} = { |
728 |
|
|
is_root => 1, |
729 |
wakaba |
1.40 |
check_attrs => $GetHTMLAttrsChecker->({ |
730 |
wakaba |
1.16 |
manifest => $HTMLURIAttrChecker, |
731 |
wakaba |
1.1 |
xmlns => sub { |
732 |
|
|
my ($self, $attr) = @_; |
733 |
|
|
my $value = $attr->value; |
734 |
|
|
unless ($value eq $HTML_NS) { |
735 |
|
|
$self->{onerror}->(node => $attr, type => 'invalid attribute value'); |
736 |
|
|
} |
737 |
|
|
unless ($attr->owner_document->manakai_is_html) { |
738 |
|
|
$self->{onerror}->(node => $attr, type => 'in XML:xmlns'); |
739 |
|
|
## TODO: Test |
740 |
|
|
} |
741 |
|
|
}, |
742 |
|
|
}), |
743 |
wakaba |
1.40 |
check_start => sub { |
744 |
|
|
my ($self, $item, $element_state) = @_; |
745 |
|
|
$element_state->{phase} = 'before head'; |
746 |
|
|
}, |
747 |
|
|
check_child_element => sub { |
748 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
749 |
|
|
$child_is_transparent, $element_state) = @_; |
750 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
751 |
|
|
$self->{onerror}->(node => $child_el, |
752 |
|
|
type => 'element not allowed:minus', |
753 |
|
|
level => $self->{must_level}); |
754 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
755 |
|
|
# |
756 |
|
|
} elsif ($element_state->{phase} eq 'before head') { |
757 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'head') { |
758 |
|
|
$element_state->{phase} = 'after head'; |
759 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'body') { |
760 |
|
|
$self->{onerror}->(node => $child_el, |
761 |
|
|
type => 'ps element missing:head'); |
762 |
|
|
$element_state->{phase} = 'after body'; |
763 |
|
|
} else { |
764 |
|
|
$self->{onerror}->(node => $child_el, |
765 |
|
|
type => 'element not allowed'); |
766 |
|
|
} |
767 |
|
|
} elsif ($element_state->{phase} eq 'after head') { |
768 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'body') { |
769 |
|
|
$element_state->{phase} = 'after body'; |
770 |
|
|
} else { |
771 |
|
|
$self->{onerror}->(node => $child_el, |
772 |
|
|
type => 'element not allowed'); |
773 |
|
|
} |
774 |
|
|
} elsif ($element_state->{phase} eq 'after body') { |
775 |
|
|
$self->{onerror}->(node => $child_el, |
776 |
|
|
type => 'element not allowed'); |
777 |
|
|
} else { |
778 |
|
|
die "check_child_element: Bad |html| phase: $element_state->{phase}"; |
779 |
|
|
} |
780 |
|
|
}, |
781 |
|
|
check_child_text => sub { |
782 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
783 |
|
|
if ($has_significant) { |
784 |
|
|
$self->{onerror}->(node => $child_node, |
785 |
|
|
type => 'character not allowed'); |
786 |
|
|
} |
787 |
|
|
}, |
788 |
|
|
check_end => sub { |
789 |
|
|
my ($self, $item, $element_state) = @_; |
790 |
|
|
if ($element_state->{phase} eq 'after body') { |
791 |
|
|
# |
792 |
|
|
} elsif ($element_state->{phase} eq 'before head') { |
793 |
|
|
$self->{onerror}->(node => $item->{node}, |
794 |
|
|
type => 'child element missing:head'); |
795 |
|
|
$self->{onerror}->(node => $item->{node}, |
796 |
|
|
type => 'child element missing:body'); |
797 |
|
|
} elsif ($element_state->{phase} eq 'after head') { |
798 |
|
|
$self->{onerror}->(node => $item->{node}, |
799 |
|
|
type => 'child element missing:body'); |
800 |
|
|
} else { |
801 |
|
|
die "check_end: Bad |html| phase: $element_state->{phase}"; |
802 |
|
|
} |
803 |
wakaba |
1.1 |
|
804 |
wakaba |
1.40 |
$HTMLChecker{check_end}->(@_); |
805 |
|
|
}, |
806 |
|
|
}; |
807 |
wakaba |
1.25 |
|
808 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{head} = { |
809 |
|
|
check_attrs => $GetHTMLAttrsChecker->({}), |
810 |
|
|
check_child_element => sub { |
811 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
812 |
|
|
$child_is_transparent, $element_state) = @_; |
813 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
814 |
|
|
$self->{onerror}->(node => $child_el, |
815 |
|
|
type => 'element not allowed:minus', |
816 |
|
|
level => $self->{must_level}); |
817 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
818 |
|
|
# |
819 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'title') { |
820 |
|
|
unless ($element_state->{has_title}) { |
821 |
|
|
$element_state->{has_title} = 1; |
822 |
|
|
} else { |
823 |
|
|
$self->{onerror}->(node => $child_el, |
824 |
|
|
type => 'element not allowed:head title', |
825 |
|
|
level => $self->{must_level}); |
826 |
|
|
} |
827 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') { |
828 |
|
|
if ($child_el->has_attribute_ns (undef, 'scoped')) { |
829 |
|
|
$self->{onerror}->(node => $child_el, |
830 |
|
|
type => 'element not allowed:head style', |
831 |
|
|
level => $self->{must_level}); |
832 |
wakaba |
1.1 |
} |
833 |
wakaba |
1.40 |
} elsif ($HTMLMetadataContent->{$child_nsuri}->{$child_ln}) { |
834 |
|
|
# |
835 |
|
|
|
836 |
|
|
## NOTE: |meta| is a metadata content. However, strictly speaking, |
837 |
|
|
## a |meta| element with none of |charset|, |name|, |
838 |
|
|
## or |http-equiv| attribute is not allowed. It is non-conforming |
839 |
|
|
## anyway. |
840 |
|
|
} else { |
841 |
|
|
$self->{onerror}->(node => $child_el, |
842 |
|
|
type => 'element not allowed:metadata', |
843 |
|
|
level => $self->{must_level}); |
844 |
|
|
} |
845 |
|
|
$element_state->{in_head_original} = $self->{flag}->{in_head}; |
846 |
|
|
$self->{flag}->{in_head} = 1; |
847 |
|
|
}, |
848 |
|
|
check_child_text => sub { |
849 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
850 |
|
|
if ($has_significant) { |
851 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
852 |
wakaba |
1.1 |
} |
853 |
wakaba |
1.40 |
}, |
854 |
|
|
check_end => sub { |
855 |
|
|
my ($self, $item, $element_state) = @_; |
856 |
|
|
unless ($element_state->{has_title}) { |
857 |
|
|
$self->{onerror}->(node => $item->{node}, |
858 |
|
|
type => 'child element missing:title'); |
859 |
wakaba |
1.1 |
} |
860 |
wakaba |
1.40 |
$self->{flag}->{in_head} = $element_state->{in_head_original}; |
861 |
wakaba |
1.1 |
|
862 |
wakaba |
1.40 |
$HTMLChecker{check_end}->(@_); |
863 |
wakaba |
1.1 |
}, |
864 |
|
|
}; |
865 |
|
|
|
866 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{title} = { |
867 |
|
|
%HTMLTextChecker, |
868 |
|
|
}; |
869 |
wakaba |
1.1 |
|
870 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{base} = { |
871 |
|
|
%HTMLEmptyChecker, |
872 |
|
|
check_attrs => sub { |
873 |
|
|
my ($self, $item, $element_state) = @_; |
874 |
wakaba |
1.1 |
|
875 |
wakaba |
1.40 |
if ($self->{has_base}) { |
876 |
|
|
$self->{onerror}->(node => $item->{node}, |
877 |
|
|
type => 'element not allowed:base'); |
878 |
|
|
} else { |
879 |
|
|
$self->{has_base} = 1; |
880 |
wakaba |
1.29 |
} |
881 |
|
|
|
882 |
wakaba |
1.40 |
my $has_href = $item->{node}->has_attribute_ns (undef, 'href'); |
883 |
|
|
my $has_target = $item->{node}->has_attribute_ns (undef, 'target'); |
884 |
wakaba |
1.14 |
|
885 |
|
|
if ($self->{has_uri_attr} and $has_href) { |
886 |
wakaba |
1.4 |
## ISSUE: Are these examples conforming? |
887 |
|
|
## <head profile="a b c"><base href> (except for |profile|'s |
888 |
|
|
## non-conformance) |
889 |
|
|
## <title xml:base="relative"/><base href/> (maybe it should be) |
890 |
|
|
## <unknown xmlns="relative"/><base href/> (assuming that |
891 |
|
|
## |{relative}:unknown| is allowed before XHTML |base| (unlikely, though)) |
892 |
|
|
## <style>@import 'relative';</style><base href> |
893 |
|
|
## <script>location.href = 'relative';</script><base href> |
894 |
wakaba |
1.14 |
## NOTE: <html manifest=".."><head><base href=""/> is conforming as |
895 |
|
|
## an exception. |
896 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
897 |
wakaba |
1.4 |
type => 'basehref after URI attribute'); |
898 |
|
|
} |
899 |
wakaba |
1.14 |
if ($self->{has_hyperlink_element} and $has_target) { |
900 |
wakaba |
1.4 |
## ISSUE: Are these examples conforming? |
901 |
|
|
## <head><title xlink:href=""/><base target="name"/></head> |
902 |
|
|
## <xbl:xbl>...<svg:a href=""/>...</xbl:xbl><base target="name"/> |
903 |
|
|
## (assuming that |xbl:xbl| is allowed before |base|) |
904 |
|
|
## NOTE: These are non-conformant anyway because of |head|'s content model: |
905 |
|
|
## <link href=""/><base target="name"/> |
906 |
|
|
## <link rel=unknown href=""><base target=name> |
907 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
908 |
wakaba |
1.4 |
type => 'basetarget after hyperlink'); |
909 |
|
|
} |
910 |
|
|
|
911 |
wakaba |
1.14 |
if (not $has_href and not $has_target) { |
912 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
913 |
wakaba |
1.14 |
type => 'attribute missing:href|target'); |
914 |
|
|
} |
915 |
|
|
|
916 |
wakaba |
1.4 |
return $GetHTMLAttrsChecker->({ |
917 |
|
|
href => $HTMLURIAttrChecker, |
918 |
|
|
target => $HTMLTargetAttrChecker, |
919 |
wakaba |
1.40 |
})->($self, $item, $element_state); |
920 |
wakaba |
1.4 |
}, |
921 |
wakaba |
1.1 |
}; |
922 |
|
|
|
923 |
|
|
$Element->{$HTML_NS}->{link} = { |
924 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
925 |
|
|
check_attrs => sub { |
926 |
|
|
my ($self, $item, $element_state) = @_; |
927 |
wakaba |
1.1 |
$GetHTMLAttrsChecker->({ |
928 |
|
|
href => $HTMLURIAttrChecker, |
929 |
wakaba |
1.40 |
rel => sub { $HTMLLinkTypesAttrChecker->(0, $item, @_) }, |
930 |
wakaba |
1.1 |
media => $HTMLMQAttrChecker, |
931 |
|
|
hreflang => $HTMLLanguageTagAttrChecker, |
932 |
|
|
type => $HTMLIMTAttrChecker, |
933 |
|
|
## NOTE: Though |title| has special semantics, |
934 |
|
|
## syntactically same as the |title| as global attribute. |
935 |
wakaba |
1.40 |
})->($self, $item, $element_state); |
936 |
|
|
if ($item->{node}->has_attribute_ns (undef, 'href')) { |
937 |
|
|
$self->{has_hyperlink_element} = 1 if $item->{has_hyperlink_link_type}; |
938 |
wakaba |
1.4 |
} else { |
939 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
940 |
wakaba |
1.1 |
type => 'attribute missing:href'); |
941 |
|
|
} |
942 |
wakaba |
1.40 |
unless ($item->{node}->has_attribute_ns (undef, 'rel')) { |
943 |
|
|
$self->{onerror}->(node => $item->{node}, |
944 |
wakaba |
1.1 |
type => 'attribute missing:rel'); |
945 |
|
|
} |
946 |
|
|
}, |
947 |
|
|
}; |
948 |
|
|
|
949 |
|
|
$Element->{$HTML_NS}->{meta} = { |
950 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
951 |
|
|
check_attrs => sub { |
952 |
|
|
my ($self, $item, $element_state) = @_; |
953 |
wakaba |
1.1 |
my $name_attr; |
954 |
|
|
my $http_equiv_attr; |
955 |
|
|
my $charset_attr; |
956 |
|
|
my $content_attr; |
957 |
wakaba |
1.40 |
for my $attr (@{$item->{node}->attributes}) { |
958 |
wakaba |
1.1 |
my $attr_ns = $attr->namespace_uri; |
959 |
|
|
$attr_ns = '' unless defined $attr_ns; |
960 |
|
|
my $attr_ln = $attr->manakai_local_name; |
961 |
|
|
my $checker; |
962 |
|
|
if ($attr_ns eq '') { |
963 |
|
|
if ($attr_ln eq 'content') { |
964 |
|
|
$content_attr = $attr; |
965 |
|
|
$checker = 1; |
966 |
|
|
} elsif ($attr_ln eq 'name') { |
967 |
|
|
$name_attr = $attr; |
968 |
|
|
$checker = 1; |
969 |
|
|
} elsif ($attr_ln eq 'http-equiv') { |
970 |
|
|
$http_equiv_attr = $attr; |
971 |
|
|
$checker = 1; |
972 |
|
|
} elsif ($attr_ln eq 'charset') { |
973 |
|
|
$charset_attr = $attr; |
974 |
|
|
$checker = 1; |
975 |
|
|
} else { |
976 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln} |
977 |
|
|
|| $AttrChecker->{$attr_ns}->{$attr_ln} |
978 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
979 |
|
|
} |
980 |
|
|
} else { |
981 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
982 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
983 |
|
|
} |
984 |
|
|
if ($checker) { |
985 |
|
|
$checker->($self, $attr) if ref $checker; |
986 |
|
|
} else { |
987 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
988 |
|
|
type => 'attribute'); |
989 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
990 |
|
|
} |
991 |
|
|
} |
992 |
|
|
|
993 |
|
|
if (defined $name_attr) { |
994 |
|
|
if (defined $http_equiv_attr) { |
995 |
|
|
$self->{onerror}->(node => $http_equiv_attr, |
996 |
|
|
type => 'attribute not allowed'); |
997 |
|
|
} elsif (defined $charset_attr) { |
998 |
|
|
$self->{onerror}->(node => $charset_attr, |
999 |
|
|
type => 'attribute not allowed'); |
1000 |
|
|
} |
1001 |
|
|
my $metadata_name = $name_attr->value; |
1002 |
|
|
my $metadata_value; |
1003 |
|
|
if (defined $content_attr) { |
1004 |
|
|
$metadata_value = $content_attr->value; |
1005 |
|
|
} else { |
1006 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
1007 |
wakaba |
1.1 |
type => 'attribute missing:content'); |
1008 |
|
|
$metadata_value = ''; |
1009 |
|
|
} |
1010 |
|
|
} elsif (defined $http_equiv_attr) { |
1011 |
|
|
if (defined $charset_attr) { |
1012 |
|
|
$self->{onerror}->(node => $charset_attr, |
1013 |
|
|
type => 'attribute not allowed'); |
1014 |
|
|
} |
1015 |
|
|
unless (defined $content_attr) { |
1016 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
1017 |
wakaba |
1.1 |
type => 'attribute missing:content'); |
1018 |
|
|
} |
1019 |
|
|
} elsif (defined $charset_attr) { |
1020 |
|
|
if (defined $content_attr) { |
1021 |
|
|
$self->{onerror}->(node => $content_attr, |
1022 |
|
|
type => 'attribute not allowed'); |
1023 |
|
|
} |
1024 |
|
|
} else { |
1025 |
|
|
if (defined $content_attr) { |
1026 |
|
|
$self->{onerror}->(node => $content_attr, |
1027 |
|
|
type => 'attribute not allowed'); |
1028 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
1029 |
wakaba |
1.1 |
type => 'attribute missing:name|http-equiv'); |
1030 |
|
|
} else { |
1031 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
1032 |
wakaba |
1.1 |
type => 'attribute missing:name|http-equiv|charset'); |
1033 |
|
|
} |
1034 |
|
|
} |
1035 |
|
|
|
1036 |
wakaba |
1.32 |
my $check_charset_decl = sub () { |
1037 |
wakaba |
1.40 |
my $parent = $item->{node}->manakai_parent_element; |
1038 |
wakaba |
1.29 |
if ($parent and $parent eq $parent->owner_document->manakai_head) { |
1039 |
|
|
for my $el (@{$parent->child_nodes}) { |
1040 |
|
|
next unless $el->node_type == 1; # ELEMENT_NODE |
1041 |
wakaba |
1.40 |
unless ($el eq $item->{node}) { |
1042 |
wakaba |
1.29 |
## NOTE: Not the first child element. |
1043 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
1044 |
wakaba |
1.32 |
type => 'element not allowed:meta charset', |
1045 |
|
|
level => $self->{must_level}); |
1046 |
wakaba |
1.29 |
} |
1047 |
|
|
last; |
1048 |
|
|
## NOTE: Entity references are not supported. |
1049 |
|
|
} |
1050 |
|
|
} else { |
1051 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
1052 |
wakaba |
1.32 |
type => 'element not allowed:meta charset', |
1053 |
|
|
level => $self->{must_level}); |
1054 |
wakaba |
1.29 |
} |
1055 |
|
|
|
1056 |
wakaba |
1.40 |
unless ($item->{node}->owner_document->manakai_is_html) { |
1057 |
|
|
$self->{onerror}->(node => $item->{node}, |
1058 |
wakaba |
1.32 |
type => 'in XML:charset', |
1059 |
|
|
level => $self->{must_level}); |
1060 |
wakaba |
1.1 |
} |
1061 |
wakaba |
1.32 |
}; # $check_charset_decl |
1062 |
wakaba |
1.21 |
|
1063 |
wakaba |
1.32 |
my $check_charset = sub ($$) { |
1064 |
|
|
my ($attr, $charset_value) = @_; |
1065 |
wakaba |
1.21 |
## NOTE: Though the case-sensitivility of |charset| attribute value |
1066 |
|
|
## is not explicitly spelled in the HTML5 spec, the Character Set |
1067 |
|
|
## registry of IANA, which is referenced from HTML5 spec, says that |
1068 |
|
|
## charset name is case-insensitive. |
1069 |
|
|
$charset_value =~ tr/A-Z/a-z/; ## NOTE: ASCII Case-insensitive. |
1070 |
|
|
|
1071 |
|
|
require Message::Charset::Info; |
1072 |
|
|
my $charset = $Message::Charset::Info::IANACharset->{$charset_value}; |
1073 |
wakaba |
1.40 |
my $ic = $item->{node}->owner_document->input_encoding; |
1074 |
wakaba |
1.21 |
if (defined $ic) { |
1075 |
|
|
## TODO: Test for this case |
1076 |
|
|
my $ic_charset = $Message::Charset::Info::IANACharset->{$ic}; |
1077 |
|
|
if ($charset ne $ic_charset) { |
1078 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1079 |
wakaba |
1.21 |
type => 'mismatched charset name:'.$ic. |
1080 |
wakaba |
1.32 |
':'.$charset_value, ## TODO: This should be a |value| value. |
1081 |
|
|
level => $self->{must_level}); |
1082 |
wakaba |
1.21 |
} |
1083 |
|
|
} else { |
1084 |
|
|
## NOTE: MUST, but not checkable, since the document is not originally |
1085 |
|
|
## in serialized form (or the parser does not preserve the input |
1086 |
|
|
## encoding information). |
1087 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1088 |
|
|
type => 'mismatched charset name::'.$charset_value, ## TODO: |value| |
1089 |
wakaba |
1.21 |
level => 'unsupported'); |
1090 |
|
|
} |
1091 |
|
|
|
1092 |
|
|
## ISSUE: What is "valid character encoding name"? Syntactically valid? |
1093 |
|
|
## Syntactically valid and registered? What about x-charset names? |
1094 |
|
|
unless (Message::Charset::Info::is_syntactically_valid_iana_charset_name |
1095 |
|
|
($charset_value)) { |
1096 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1097 |
|
|
type => 'charset:syntax error:'.$charset_value, ## TODO |
1098 |
|
|
level => $self->{must_level}); |
1099 |
wakaba |
1.21 |
} |
1100 |
|
|
|
1101 |
|
|
if ($charset) { |
1102 |
|
|
## ISSUE: What is "the preferred name for that encoding" (for a charset |
1103 |
|
|
## with no "preferred MIME name" label)? |
1104 |
|
|
my $charset_status = $charset->{iana_names}->{$charset_value} || 0; |
1105 |
|
|
if (($charset_status & |
1106 |
|
|
Message::Charset::Info::PREFERRED_CHARSET_NAME ()) |
1107 |
|
|
!= Message::Charset::Info::PREFERRED_CHARSET_NAME ()) { |
1108 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1109 |
wakaba |
1.21 |
type => 'charset:not preferred:'. |
1110 |
wakaba |
1.32 |
$charset_value, ## TODO |
1111 |
|
|
level => $self->{must_level}); |
1112 |
wakaba |
1.21 |
} |
1113 |
|
|
if (($charset_status & |
1114 |
|
|
Message::Charset::Info::REGISTERED_CHARSET_NAME ()) |
1115 |
|
|
!= Message::Charset::Info::REGISTERED_CHARSET_NAME ()) { |
1116 |
|
|
if ($charset_value =~ /^x-/) { |
1117 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1118 |
|
|
type => 'charset:private:'.$charset_value, ## TODO |
1119 |
wakaba |
1.21 |
level => $self->{good_level}); |
1120 |
|
|
} else { |
1121 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1122 |
wakaba |
1.21 |
type => 'charset:not registered:'. |
1123 |
wakaba |
1.32 |
$charset_value, ## TODO |
1124 |
wakaba |
1.21 |
level => $self->{good_level}); |
1125 |
|
|
} |
1126 |
|
|
} |
1127 |
|
|
} elsif ($charset_value =~ /^x-/) { |
1128 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1129 |
|
|
type => 'charset:private:'.$charset_value, ## TODO |
1130 |
wakaba |
1.21 |
level => $self->{good_level}); |
1131 |
|
|
} else { |
1132 |
wakaba |
1.32 |
$self->{onerror}->(node => $attr, |
1133 |
|
|
type => 'charset:not registered:'.$charset_value, ## TODO |
1134 |
wakaba |
1.21 |
level => $self->{good_level}); |
1135 |
|
|
} |
1136 |
|
|
|
1137 |
wakaba |
1.32 |
if ($attr->get_user_data ('manakai_has_reference')) { |
1138 |
|
|
$self->{onerror}->(node => $attr, |
1139 |
wakaba |
1.22 |
type => 'character reference in charset', |
1140 |
|
|
level => $self->{must_level}); |
1141 |
|
|
} |
1142 |
wakaba |
1.32 |
}; # $check_charset |
1143 |
|
|
|
1144 |
|
|
## TODO: metadata conformance |
1145 |
|
|
|
1146 |
|
|
## TODO: pragma conformance |
1147 |
|
|
if (defined $http_equiv_attr) { ## An enumerated attribute |
1148 |
|
|
my $keyword = lc $http_equiv_attr->value; ## TODO: ascii case? |
1149 |
|
|
if ({ |
1150 |
|
|
'refresh' => 1, |
1151 |
|
|
'default-style' => 1, |
1152 |
|
|
}->{$keyword}) { |
1153 |
|
|
# |
1154 |
wakaba |
1.33 |
|
1155 |
|
|
## TODO: More than one occurence is a MUST-level error (revision 1180). |
1156 |
wakaba |
1.32 |
} elsif ($keyword eq 'content-type') { |
1157 |
wakaba |
1.33 |
## ISSUE: Though it is renamed as "Encoding declaration" state in rev |
1158 |
|
|
## 1221, there are still many occurence of "Content-Type" state in |
1159 |
|
|
## the spec. |
1160 |
|
|
|
1161 |
wakaba |
1.32 |
$check_charset_decl->(); |
1162 |
|
|
if ($content_attr) { |
1163 |
|
|
my $content = $content_attr->value; |
1164 |
|
|
if ($content =~ m!^text/html;\x20?charset=(.+)\z!s) { |
1165 |
|
|
$check_charset->($content_attr, $1); |
1166 |
|
|
} else { |
1167 |
|
|
$self->{onerror}->(node => $content_attr, |
1168 |
|
|
type => 'meta content-type syntax error', |
1169 |
|
|
level => $self->{must_level}); |
1170 |
|
|
} |
1171 |
|
|
} |
1172 |
|
|
} else { |
1173 |
|
|
$self->{onerror}->(node => $http_equiv_attr, |
1174 |
|
|
type => 'enumerated:invalid'); |
1175 |
|
|
} |
1176 |
|
|
} |
1177 |
|
|
|
1178 |
|
|
if (defined $charset_attr) { |
1179 |
|
|
$check_charset_decl->(); |
1180 |
|
|
$check_charset->($charset_attr, $charset_attr->value); |
1181 |
wakaba |
1.1 |
} |
1182 |
|
|
}, |
1183 |
|
|
}; |
1184 |
|
|
|
1185 |
|
|
$Element->{$HTML_NS}->{style} = { |
1186 |
wakaba |
1.40 |
%HTMLChecker, |
1187 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1188 |
wakaba |
1.1 |
type => $HTMLIMTAttrChecker, ## TODO: MUST be a styling language |
1189 |
|
|
media => $HTMLMQAttrChecker, |
1190 |
|
|
scoped => $GetHTMLBooleanAttrChecker->('scoped'), |
1191 |
|
|
## NOTE: |title| has special semantics for |style|s, but is syntactically |
1192 |
|
|
## not different |
1193 |
|
|
}), |
1194 |
wakaba |
1.40 |
check_start => sub { |
1195 |
|
|
my ($self, $item, $element_state) = @_; |
1196 |
|
|
|
1197 |
wakaba |
1.27 |
## NOTE: |html:style| itself has no conformance creteria on content model. |
1198 |
wakaba |
1.40 |
my $type = $item->{node}->get_attribute_ns (undef, 'type'); |
1199 |
wakaba |
1.27 |
if (not defined $type or |
1200 |
|
|
$type =~ m[\A(?>(?>\x0D\x0A)?[\x09\x20])*[Tt][Ee][Xx][Tt](?>(?>\x0D\x0A)?[\x09\x20])*/(?>(?>\x0D\x0A)?[\x09\x20])*[Cc][Ss][Ss](?>(?>\x0D\x0A)?[\x09\x20])*\z]) { |
1201 |
wakaba |
1.40 |
$element_state->{allow_element} = 0; |
1202 |
|
|
$element_state->{style_type} = 'text/css'; |
1203 |
|
|
} else { |
1204 |
|
|
$element_state->{allow_element} = 1; # unknown |
1205 |
|
|
$element_state->{style_type} = $type; ## TODO: $type normalization |
1206 |
|
|
} |
1207 |
|
|
}, |
1208 |
|
|
check_child_element => sub { |
1209 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
1210 |
|
|
$child_is_transparent, $element_state) = @_; |
1211 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
1212 |
|
|
$self->{onerror}->(node => $child_el, |
1213 |
|
|
type => 'element not allowed:minus', |
1214 |
|
|
level => $self->{must_level}); |
1215 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
1216 |
|
|
# |
1217 |
|
|
} elsif ($element_state->{allow_element}) { |
1218 |
|
|
# |
1219 |
|
|
} else { |
1220 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
1221 |
|
|
} |
1222 |
|
|
}, |
1223 |
|
|
check_child_text => sub { |
1224 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
1225 |
|
|
$element_state->{text} .= $child_node->text_content; |
1226 |
|
|
}, |
1227 |
|
|
check_end => sub { |
1228 |
|
|
my ($self, $item, $element_state) = @_; |
1229 |
|
|
if ($element_state->{style_type} eq 'text/css') { |
1230 |
|
|
$self->{onsubdoc}->({s => $element_state->{text}, |
1231 |
|
|
container_node => $item->{node}, |
1232 |
wakaba |
1.28 |
media_type => 'text/css', is_char_string => 1}); |
1233 |
wakaba |
1.27 |
} else { |
1234 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, level => 'unsupported', |
1235 |
|
|
type => 'style:'.$element_state->{style_type}); |
1236 |
wakaba |
1.27 |
} |
1237 |
wakaba |
1.40 |
|
1238 |
|
|
$HTMLChecker{check_end}->(@_); |
1239 |
wakaba |
1.1 |
}, |
1240 |
|
|
}; |
1241 |
wakaba |
1.25 |
## ISSUE: Relationship to significant content check? |
1242 |
wakaba |
1.1 |
|
1243 |
|
|
$Element->{$HTML_NS}->{body} = { |
1244 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1245 |
wakaba |
1.1 |
}; |
1246 |
|
|
|
1247 |
|
|
$Element->{$HTML_NS}->{section} = { |
1248 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1249 |
wakaba |
1.1 |
}; |
1250 |
|
|
|
1251 |
|
|
$Element->{$HTML_NS}->{nav} = { |
1252 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1253 |
wakaba |
1.1 |
}; |
1254 |
|
|
|
1255 |
|
|
$Element->{$HTML_NS}->{article} = { |
1256 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1257 |
wakaba |
1.1 |
}; |
1258 |
|
|
|
1259 |
|
|
$Element->{$HTML_NS}->{blockquote} = { |
1260 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1261 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1262 |
wakaba |
1.1 |
cite => $HTMLURIAttrChecker, |
1263 |
|
|
}), |
1264 |
|
|
}; |
1265 |
|
|
|
1266 |
|
|
$Element->{$HTML_NS}->{aside} = { |
1267 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1268 |
wakaba |
1.1 |
}; |
1269 |
|
|
|
1270 |
|
|
$Element->{$HTML_NS}->{h1} = { |
1271 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1272 |
|
|
check_start => sub { |
1273 |
|
|
my ($self, $item, $element_state) = @_; |
1274 |
|
|
$self->{flag}->{has_hn} = 1; |
1275 |
wakaba |
1.1 |
}, |
1276 |
|
|
}; |
1277 |
|
|
|
1278 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{h2} = {%{$Element->{$HTML_NS}->{h1}}}; |
1279 |
wakaba |
1.1 |
|
1280 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{h3} = {%{$Element->{$HTML_NS}->{h1}}}; |
1281 |
wakaba |
1.1 |
|
1282 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{h4} = {%{$Element->{$HTML_NS}->{h1}}}; |
1283 |
wakaba |
1.1 |
|
1284 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{h5} = {%{$Element->{$HTML_NS}->{h1}}}; |
1285 |
wakaba |
1.1 |
|
1286 |
wakaba |
1.40 |
$Element->{$HTML_NS}->{h6} = {%{$Element->{$HTML_NS}->{h1}}}; |
1287 |
wakaba |
1.1 |
|
1288 |
wakaba |
1.29 |
## TODO: Explicit sectioning is "encouraged". |
1289 |
|
|
|
1290 |
wakaba |
1.1 |
$Element->{$HTML_NS}->{header} = { |
1291 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1292 |
|
|
check_start => sub { |
1293 |
|
|
my ($self, $item, $element_state) = @_; |
1294 |
|
|
$self->_add_minus_elements ($element_state, |
1295 |
|
|
{$HTML_NS => {qw/header 1 footer 1/}}, |
1296 |
|
|
$HTMLSectioningContent); |
1297 |
|
|
$element_state->{has_hn_original} = $self->{flag}->{has_hn}; |
1298 |
|
|
$self->{flag}->{has_hn} = 0; |
1299 |
|
|
}, |
1300 |
|
|
check_end => sub { |
1301 |
|
|
my ($self, $item, $element_state) = @_; |
1302 |
|
|
$self->_remove_minus_elements ($element_state); |
1303 |
|
|
unless ($self->{flag}->{has_hn}) { |
1304 |
|
|
$self->{onerror}->(node => $item->{node}, |
1305 |
|
|
type => 'element missing:hn'); |
1306 |
|
|
} |
1307 |
|
|
$self->{flag}->{has_hn} ||= $element_state->{has_hn_original}; |
1308 |
wakaba |
1.1 |
|
1309 |
wakaba |
1.40 |
$HTMLProseContentChecker{check_end}->(@_); |
1310 |
wakaba |
1.1 |
}, |
1311 |
wakaba |
1.40 |
## ISSUE: <header><del><h1>...</h1></del></header> is conforming? |
1312 |
wakaba |
1.1 |
}; |
1313 |
|
|
|
1314 |
|
|
$Element->{$HTML_NS}->{footer} = { |
1315 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1316 |
|
|
check_start => sub { |
1317 |
|
|
my ($self, $item, $element_state) = @_; |
1318 |
|
|
$self->_add_minus_elements ($element_state, |
1319 |
|
|
{$HTML_NS => {footer => 1}}, |
1320 |
|
|
$HTMLSectioningContent, $HTMLHeadingContent); |
1321 |
|
|
}, |
1322 |
|
|
check_end => sub { |
1323 |
|
|
my ($self, $item, $element_state) = @_; |
1324 |
|
|
$self->_remove_minus_elements ($element_state); |
1325 |
wakaba |
1.1 |
|
1326 |
wakaba |
1.40 |
$HTMLProseContentChecker{check_end}->(@_); |
1327 |
wakaba |
1.1 |
}, |
1328 |
|
|
}; |
1329 |
|
|
|
1330 |
|
|
$Element->{$HTML_NS}->{address} = { |
1331 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1332 |
|
|
check_start => sub { |
1333 |
|
|
my ($self, $item, $element_state) = @_; |
1334 |
|
|
$self->_add_minus_elements ($element_state, |
1335 |
|
|
{$HTML_NS => {footer => 1, address => 1}}, |
1336 |
|
|
$HTMLSectioningContent, $HTMLHeadingContent); |
1337 |
|
|
}, |
1338 |
|
|
check_end => sub { |
1339 |
|
|
my ($self, $item, $element_state) = @_; |
1340 |
|
|
$self->_remove_minus_elements ($element_state); |
1341 |
wakaba |
1.29 |
|
1342 |
wakaba |
1.40 |
$HTMLProseContentChecker{check_end}->(@_); |
1343 |
wakaba |
1.29 |
}, |
1344 |
wakaba |
1.1 |
}; |
1345 |
|
|
|
1346 |
|
|
$Element->{$HTML_NS}->{p} = { |
1347 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1348 |
wakaba |
1.1 |
}; |
1349 |
|
|
|
1350 |
|
|
$Element->{$HTML_NS}->{hr} = { |
1351 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
1352 |
wakaba |
1.1 |
}; |
1353 |
|
|
|
1354 |
|
|
$Element->{$HTML_NS}->{br} = { |
1355 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
1356 |
wakaba |
1.29 |
## NOTE: Blank line MUST NOT be used for presentation purpose. |
1357 |
|
|
## (This requirement is semantic so that we cannot check.) |
1358 |
wakaba |
1.1 |
}; |
1359 |
|
|
|
1360 |
|
|
$Element->{$HTML_NS}->{dialog} = { |
1361 |
wakaba |
1.40 |
%HTMLChecker, |
1362 |
|
|
check_start => sub { |
1363 |
|
|
my ($self, $item, $element_state) = @_; |
1364 |
|
|
$element_state->{phase} = 'before dt'; |
1365 |
|
|
}, |
1366 |
|
|
check_child_element => sub { |
1367 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
1368 |
|
|
$child_is_transparent, $element_state) = @_; |
1369 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
1370 |
|
|
$self->{onerror}->(node => $child_el, |
1371 |
|
|
type => 'element not allowed:minus', |
1372 |
|
|
level => $self->{must_level}); |
1373 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
1374 |
|
|
# |
1375 |
|
|
} elsif ($element_state->{phase} eq 'before dt') { |
1376 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') { |
1377 |
|
|
$element_state->{phase} = 'before dd'; |
1378 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') { |
1379 |
|
|
$self->{onerror} |
1380 |
|
|
->(node => $child_el, type => 'ps element missing:dt'); |
1381 |
|
|
$element_state->{phase} = 'before dt'; |
1382 |
|
|
} else { |
1383 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
1384 |
|
|
} |
1385 |
|
|
} elsif ($element_state->{phase} eq 'before dd') { |
1386 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') { |
1387 |
|
|
$element_state->{phase} = 'before dt'; |
1388 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') { |
1389 |
|
|
$self->{onerror} |
1390 |
|
|
->(node => $child_el, type => 'ps element missing:dd'); |
1391 |
|
|
$element_state->{phase} = 'before dd'; |
1392 |
|
|
} else { |
1393 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
1394 |
wakaba |
1.1 |
} |
1395 |
wakaba |
1.40 |
} else { |
1396 |
|
|
die "check_child_element: Bad |dialog| phase: $element_state->{phase}"; |
1397 |
|
|
} |
1398 |
|
|
}, |
1399 |
|
|
check_child_text => sub { |
1400 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
1401 |
|
|
if ($has_significant) { |
1402 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
1403 |
wakaba |
1.1 |
} |
1404 |
wakaba |
1.40 |
}, |
1405 |
|
|
check_end => sub { |
1406 |
|
|
my ($self, $item, $element_state) = @_; |
1407 |
|
|
if ($element_state->{phase} eq 'before dd') { |
1408 |
|
|
$self->{onerror}->(node => $item->{node}, |
1409 |
|
|
type => 'child element missing:dd'); |
1410 |
wakaba |
1.1 |
} |
1411 |
wakaba |
1.40 |
|
1412 |
|
|
$HTMLChecker{check_end}->(@_); |
1413 |
wakaba |
1.1 |
}, |
1414 |
|
|
}; |
1415 |
|
|
|
1416 |
|
|
$Element->{$HTML_NS}->{pre} = { |
1417 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1418 |
wakaba |
1.1 |
}; |
1419 |
|
|
|
1420 |
|
|
$Element->{$HTML_NS}->{ol} = { |
1421 |
wakaba |
1.40 |
%HTMLChecker, |
1422 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1423 |
wakaba |
1.1 |
start => $HTMLIntegerAttrChecker, |
1424 |
|
|
}), |
1425 |
wakaba |
1.40 |
check_child_element => sub { |
1426 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
1427 |
|
|
$child_is_transparent, $element_state) = @_; |
1428 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
1429 |
|
|
$self->{onerror}->(node => $child_el, |
1430 |
|
|
type => 'element not allowed:minus', |
1431 |
|
|
level => $self->{must_level}); |
1432 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
1433 |
|
|
# |
1434 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') { |
1435 |
|
|
# |
1436 |
|
|
} else { |
1437 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
1438 |
wakaba |
1.1 |
} |
1439 |
wakaba |
1.40 |
}, |
1440 |
|
|
check_child_text => sub { |
1441 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
1442 |
|
|
if ($has_significant) { |
1443 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
1444 |
wakaba |
1.1 |
} |
1445 |
|
|
}, |
1446 |
|
|
}; |
1447 |
|
|
|
1448 |
|
|
$Element->{$HTML_NS}->{ul} = { |
1449 |
wakaba |
1.40 |
%{$Element->{$HTML_NS}->{ol}}, |
1450 |
wakaba |
1.1 |
}; |
1451 |
|
|
|
1452 |
|
|
$Element->{$HTML_NS}->{li} = { |
1453 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1454 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1455 |
wakaba |
1.1 |
start => sub { |
1456 |
|
|
my ($self, $attr) = @_; |
1457 |
|
|
my $parent = $attr->owner_element->manakai_parent_element; |
1458 |
|
|
if (defined $parent) { |
1459 |
|
|
my $parent_ns = $parent->namespace_uri; |
1460 |
|
|
$parent_ns = '' unless defined $parent_ns; |
1461 |
|
|
my $parent_ln = $parent->manakai_local_name; |
1462 |
|
|
unless ($parent_ns eq $HTML_NS and $parent_ln eq 'ol') { |
1463 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
1464 |
|
|
type => 'attribute'); |
1465 |
|
|
} |
1466 |
|
|
} |
1467 |
|
|
$HTMLIntegerAttrChecker->($self, $attr); |
1468 |
|
|
}, |
1469 |
|
|
}), |
1470 |
wakaba |
1.40 |
check_child_element => sub { |
1471 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
1472 |
|
|
$child_is_transparent, $element_state) = @_; |
1473 |
|
|
if ($self->{flag}->{in_menu}) { |
1474 |
|
|
$HTMLPhrasingContentChecker{check_child_element}->(@_); |
1475 |
|
|
} else { |
1476 |
|
|
$HTMLProseContentChecker{check_child_element}->(@_); |
1477 |
|
|
} |
1478 |
|
|
}, |
1479 |
|
|
check_child_text => sub { |
1480 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
1481 |
|
|
if ($self->{flag}->{in_menu}) { |
1482 |
|
|
$HTMLPhrasingContentChecker{check_child_text}->(@_); |
1483 |
wakaba |
1.1 |
} else { |
1484 |
wakaba |
1.40 |
$HTMLProseContentChecker{check_child_text}->(@_); |
1485 |
wakaba |
1.1 |
} |
1486 |
|
|
}, |
1487 |
|
|
}; |
1488 |
|
|
|
1489 |
|
|
$Element->{$HTML_NS}->{dl} = { |
1490 |
wakaba |
1.40 |
%HTMLChecker, |
1491 |
|
|
check_start => sub { |
1492 |
|
|
my ($self, $item, $element_state) = @_; |
1493 |
|
|
$element_state->{phase} = 'before dt'; |
1494 |
|
|
}, |
1495 |
|
|
check_child_element => sub { |
1496 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
1497 |
|
|
$child_is_transparent, $element_state) = @_; |
1498 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
1499 |
|
|
$self->{onerror}->(node => $child_el, |
1500 |
|
|
type => 'element not allowed:minus', |
1501 |
|
|
level => $self->{must_level}); |
1502 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
1503 |
|
|
# |
1504 |
|
|
} elsif ($element_state->{phase} eq 'in dds') { |
1505 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') { |
1506 |
|
|
#$element_state->{phase} = 'in dds'; |
1507 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') { |
1508 |
|
|
$element_state->{phase} = 'in dts'; |
1509 |
|
|
} else { |
1510 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
1511 |
|
|
} |
1512 |
|
|
} elsif ($element_state->{phase} eq 'in dts') { |
1513 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') { |
1514 |
|
|
#$element_state->{phase} = 'in dts'; |
1515 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') { |
1516 |
|
|
$element_state->{phase} = 'in dds'; |
1517 |
|
|
} else { |
1518 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
1519 |
|
|
} |
1520 |
|
|
} elsif ($element_state->{phase} eq 'before dt') { |
1521 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'dt') { |
1522 |
|
|
$element_state->{phase} = 'in dts'; |
1523 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'dd') { |
1524 |
|
|
$self->{onerror} |
1525 |
|
|
->(node => $child_el, type => 'ps element missing:dt'); |
1526 |
|
|
$element_state->{phase} = 'in dds'; |
1527 |
|
|
} else { |
1528 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
1529 |
wakaba |
1.1 |
} |
1530 |
wakaba |
1.40 |
} else { |
1531 |
|
|
die "check_child_element: Bad |dl| phase: $element_state->{phase}"; |
1532 |
wakaba |
1.1 |
} |
1533 |
wakaba |
1.40 |
}, |
1534 |
|
|
check_child_text => sub { |
1535 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
1536 |
|
|
if ($has_significant) { |
1537 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
1538 |
|
|
} |
1539 |
|
|
}, |
1540 |
|
|
check_end => sub { |
1541 |
|
|
my ($self, $item, $element_state) = @_; |
1542 |
|
|
if ($element_state->{phase} eq 'in dts') { |
1543 |
|
|
$self->{onerror}->(node => $item->{node}, |
1544 |
|
|
type => 'child element missing:dd'); |
1545 |
wakaba |
1.1 |
} |
1546 |
|
|
|
1547 |
wakaba |
1.40 |
$HTMLChecker{check_end}->(@_); |
1548 |
wakaba |
1.1 |
}, |
1549 |
|
|
}; |
1550 |
|
|
|
1551 |
|
|
$Element->{$HTML_NS}->{dt} = { |
1552 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1553 |
wakaba |
1.1 |
}; |
1554 |
|
|
|
1555 |
|
|
$Element->{$HTML_NS}->{dd} = { |
1556 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1557 |
wakaba |
1.1 |
}; |
1558 |
|
|
|
1559 |
|
|
$Element->{$HTML_NS}->{a} = { |
1560 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1561 |
|
|
check_attrs => sub { |
1562 |
|
|
my ($self, $item, $element_state) = @_; |
1563 |
wakaba |
1.1 |
my %attr; |
1564 |
wakaba |
1.40 |
for my $attr (@{$item->{node}->attributes}) { |
1565 |
wakaba |
1.1 |
my $attr_ns = $attr->namespace_uri; |
1566 |
|
|
$attr_ns = '' unless defined $attr_ns; |
1567 |
|
|
my $attr_ln = $attr->manakai_local_name; |
1568 |
|
|
my $checker; |
1569 |
|
|
if ($attr_ns eq '') { |
1570 |
|
|
$checker = { |
1571 |
|
|
target => $HTMLTargetAttrChecker, |
1572 |
|
|
href => $HTMLURIAttrChecker, |
1573 |
|
|
ping => $HTMLSpaceURIsAttrChecker, |
1574 |
wakaba |
1.40 |
rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) }, |
1575 |
wakaba |
1.1 |
media => $HTMLMQAttrChecker, |
1576 |
|
|
hreflang => $HTMLLanguageTagAttrChecker, |
1577 |
|
|
type => $HTMLIMTAttrChecker, |
1578 |
|
|
}->{$attr_ln}; |
1579 |
|
|
if ($checker) { |
1580 |
|
|
$attr{$attr_ln} = $attr; |
1581 |
|
|
} else { |
1582 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln}; |
1583 |
|
|
} |
1584 |
|
|
} |
1585 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
1586 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
1587 |
|
|
if ($checker) { |
1588 |
|
|
$checker->($self, $attr) if ref $checker; |
1589 |
|
|
} else { |
1590 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
1591 |
|
|
type => 'attribute'); |
1592 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
1593 |
|
|
} |
1594 |
|
|
} |
1595 |
|
|
|
1596 |
wakaba |
1.40 |
$element_state->{in_a_href_original} = $self->{flag}->{in_a_href}; |
1597 |
wakaba |
1.4 |
if (defined $attr{href}) { |
1598 |
|
|
$self->{has_hyperlink_element} = 1; |
1599 |
wakaba |
1.40 |
$self->{flag}->{in_a_href} = 1; |
1600 |
wakaba |
1.4 |
} else { |
1601 |
wakaba |
1.1 |
for (qw/target ping rel media hreflang type/) { |
1602 |
|
|
if (defined $attr{$_}) { |
1603 |
|
|
$self->{onerror}->(node => $attr{$_}, |
1604 |
|
|
type => 'attribute not allowed'); |
1605 |
|
|
} |
1606 |
|
|
} |
1607 |
|
|
} |
1608 |
|
|
}, |
1609 |
wakaba |
1.40 |
check_start => sub { |
1610 |
|
|
my ($self, $item, $element_state) = @_; |
1611 |
|
|
$self->_add_minus_elements ($element_state, $HTMLInteractiveContent); |
1612 |
|
|
}, |
1613 |
|
|
check_end => sub { |
1614 |
|
|
my ($self, $item, $element_state) = @_; |
1615 |
|
|
$self->_remove_minus_elements ($element_state); |
1616 |
wakaba |
1.1 |
|
1617 |
wakaba |
1.40 |
$HTMLPhrasingContentChecker{check_end}->(@_); |
1618 |
wakaba |
1.1 |
}, |
1619 |
|
|
}; |
1620 |
|
|
|
1621 |
|
|
$Element->{$HTML_NS}->{q} = { |
1622 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1623 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1624 |
wakaba |
1.1 |
cite => $HTMLURIAttrChecker, |
1625 |
|
|
}), |
1626 |
|
|
}; |
1627 |
|
|
|
1628 |
|
|
$Element->{$HTML_NS}->{cite} = { |
1629 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1630 |
wakaba |
1.1 |
}; |
1631 |
|
|
|
1632 |
|
|
$Element->{$HTML_NS}->{em} = { |
1633 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1634 |
wakaba |
1.1 |
}; |
1635 |
|
|
|
1636 |
|
|
$Element->{$HTML_NS}->{strong} = { |
1637 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1638 |
wakaba |
1.1 |
}; |
1639 |
|
|
|
1640 |
|
|
$Element->{$HTML_NS}->{small} = { |
1641 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1642 |
wakaba |
1.1 |
}; |
1643 |
|
|
|
1644 |
wakaba |
1.38 |
$Element->{$HTML_NS}->{mark} = { |
1645 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1646 |
wakaba |
1.1 |
}; |
1647 |
|
|
|
1648 |
|
|
$Element->{$HTML_NS}->{dfn} = { |
1649 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1650 |
|
|
check_start => sub { |
1651 |
|
|
my ($self, $item, $element_state) = @_; |
1652 |
|
|
$self->_add_minus_elements ($element_state, {$HTML_NS => {dfn => 1}}); |
1653 |
wakaba |
1.1 |
|
1654 |
wakaba |
1.40 |
my $node = $item->{node}; |
1655 |
wakaba |
1.1 |
my $term = $node->get_attribute_ns (undef, 'title'); |
1656 |
|
|
unless (defined $term) { |
1657 |
|
|
for my $child (@{$node->child_nodes}) { |
1658 |
|
|
if ($child->node_type == 1) { # ELEMENT_NODE |
1659 |
|
|
if (defined $term) { |
1660 |
|
|
undef $term; |
1661 |
|
|
last; |
1662 |
|
|
} elsif ($child->manakai_local_name eq 'abbr') { |
1663 |
|
|
my $nsuri = $child->namespace_uri; |
1664 |
|
|
if (defined $nsuri and $nsuri eq $HTML_NS) { |
1665 |
|
|
my $attr = $child->get_attribute_node_ns (undef, 'title'); |
1666 |
|
|
if ($attr) { |
1667 |
|
|
$term = $attr->value; |
1668 |
|
|
} |
1669 |
|
|
} |
1670 |
|
|
} |
1671 |
|
|
} elsif ($child->node_type == 3 or $child->node_type == 4) { |
1672 |
|
|
## TEXT_NODE or CDATA_SECTION_NODE |
1673 |
|
|
if ($child->data =~ /\A[\x09-\x0D\x20]+\z/) { # Inter-element whitespace |
1674 |
|
|
next; |
1675 |
|
|
} |
1676 |
|
|
undef $term; |
1677 |
|
|
last; |
1678 |
|
|
} |
1679 |
|
|
} |
1680 |
|
|
unless (defined $term) { |
1681 |
|
|
$term = $node->text_content; |
1682 |
|
|
} |
1683 |
|
|
} |
1684 |
|
|
if ($self->{term}->{$term}) { |
1685 |
|
|
$self->{onerror}->(node => $node, type => 'duplicate term'); |
1686 |
|
|
push @{$self->{term}->{$term}}, $node; |
1687 |
|
|
} else { |
1688 |
|
|
$self->{term}->{$term} = [$node]; |
1689 |
|
|
} |
1690 |
|
|
## ISSUE: The HTML5 algorithm does not work with |ruby| unless |dfn| |
1691 |
|
|
## has |title|. |
1692 |
wakaba |
1.40 |
}, |
1693 |
|
|
check_end => sub { |
1694 |
|
|
my ($self, $item, $element_state) = @_; |
1695 |
|
|
$self->_remove_minus_elements ($element_state); |
1696 |
wakaba |
1.1 |
|
1697 |
wakaba |
1.40 |
$HTMLPhrasingContentChecker{check_end}->(@_); |
1698 |
wakaba |
1.1 |
}, |
1699 |
|
|
}; |
1700 |
|
|
|
1701 |
|
|
$Element->{$HTML_NS}->{abbr} = { |
1702 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1703 |
wakaba |
1.1 |
}; |
1704 |
|
|
|
1705 |
|
|
$Element->{$HTML_NS}->{time} = { |
1706 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1707 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1708 |
wakaba |
1.1 |
datetime => sub { 1 }, # checked in |checker| |
1709 |
|
|
}), |
1710 |
|
|
## TODO: Write tests |
1711 |
wakaba |
1.40 |
check_end => sub { |
1712 |
|
|
my ($self, $item, $element_state) = @_; |
1713 |
wakaba |
1.1 |
|
1714 |
wakaba |
1.40 |
my $attr = $item->{node}->get_attribute_node_ns (undef, 'datetime'); |
1715 |
wakaba |
1.1 |
my $input; |
1716 |
|
|
my $reg_sp; |
1717 |
|
|
my $input_node; |
1718 |
|
|
if ($attr) { |
1719 |
|
|
$input = $attr->value; |
1720 |
|
|
$reg_sp = qr/[\x09-\x0D\x20]*/; |
1721 |
|
|
$input_node = $attr; |
1722 |
|
|
} else { |
1723 |
wakaba |
1.40 |
$input = $item->{node}->text_content; |
1724 |
wakaba |
1.1 |
$reg_sp = qr/\p{Zs}*/; |
1725 |
wakaba |
1.40 |
$input_node = $item->{node}; |
1726 |
wakaba |
1.1 |
|
1727 |
|
|
## ISSUE: What is the definition for "successfully extracts a date |
1728 |
|
|
## or time"? If the algorithm says the string is invalid but |
1729 |
|
|
## return some date or time, is it "successfully"? |
1730 |
|
|
} |
1731 |
|
|
|
1732 |
|
|
my $hour; |
1733 |
|
|
my $minute; |
1734 |
|
|
my $second; |
1735 |
|
|
if ($input =~ / |
1736 |
|
|
\A |
1737 |
|
|
[\x09-\x0D\x20]* |
1738 |
|
|
([0-9]+) # 1 |
1739 |
|
|
(?> |
1740 |
|
|
-([0-9]+) # 2 |
1741 |
|
|
-([0-9]+) # 3 |
1742 |
|
|
[\x09-\x0D\x20]* |
1743 |
|
|
(?> |
1744 |
|
|
T |
1745 |
|
|
[\x09-\x0D\x20]* |
1746 |
|
|
)? |
1747 |
|
|
([0-9]+) # 4 |
1748 |
|
|
:([0-9]+) # 5 |
1749 |
|
|
(?> |
1750 |
|
|
:([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 6 |
1751 |
|
|
)? |
1752 |
|
|
[\x09-\x0D\x20]* |
1753 |
|
|
(?> |
1754 |
|
|
Z |
1755 |
|
|
[\x09-\x0D\x20]* |
1756 |
|
|
| |
1757 |
|
|
[+-]([0-9]+):([0-9]+) # 7, 8 |
1758 |
|
|
[\x09-\x0D\x20]* |
1759 |
|
|
)? |
1760 |
|
|
\z |
1761 |
|
|
| |
1762 |
|
|
:([0-9]+) # 9 |
1763 |
|
|
(?> |
1764 |
|
|
:([0-9]+(?>\.[0-9]*)?|\.[0-9]*) # 10 |
1765 |
|
|
)? |
1766 |
|
|
[\x09-\x0D\x20]*\z |
1767 |
|
|
) |
1768 |
|
|
/x) { |
1769 |
|
|
if (defined $2) { ## YYYY-MM-DD T? hh:mm |
1770 |
|
|
if (length $1 != 4 or length $2 != 2 or length $3 != 2 or |
1771 |
|
|
length $4 != 2 or length $5 != 2) { |
1772 |
|
|
$self->{onerror}->(node => $input_node, |
1773 |
|
|
type => 'dateortime:syntax error'); |
1774 |
|
|
} |
1775 |
|
|
|
1776 |
|
|
if (1 <= $2 and $2 <= 12) { |
1777 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad day') |
1778 |
|
|
if $3 < 1 or |
1779 |
|
|
$3 > [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]->[$2]; |
1780 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad day') |
1781 |
|
|
if $2 == 2 and $3 == 29 and |
1782 |
|
|
not ($1 % 400 == 0 or ($1 % 4 == 0 and $1 % 100 != 0)); |
1783 |
|
|
} else { |
1784 |
|
|
$self->{onerror}->(node => $input_node, |
1785 |
|
|
type => 'datetime:bad month'); |
1786 |
|
|
} |
1787 |
|
|
|
1788 |
|
|
($hour, $minute, $second) = ($4, $5, $6); |
1789 |
|
|
|
1790 |
|
|
if (defined $7) { ## [+-]hh:mm |
1791 |
|
|
if (length $7 != 2 or length $8 != 2) { |
1792 |
|
|
$self->{onerror}->(node => $input_node, |
1793 |
|
|
type => 'dateortime:syntax error'); |
1794 |
|
|
} |
1795 |
|
|
|
1796 |
|
|
$self->{onerror}->(node => $input_node, |
1797 |
|
|
type => 'datetime:bad timezone hour') |
1798 |
|
|
if $7 > 23; |
1799 |
|
|
$self->{onerror}->(node => $input_node, |
1800 |
|
|
type => 'datetime:bad timezone minute') |
1801 |
|
|
if $8 > 59; |
1802 |
|
|
} |
1803 |
|
|
} else { ## hh:mm |
1804 |
|
|
if (length $1 != 2 or length $9 != 2) { |
1805 |
|
|
$self->{onerror}->(node => $input_node, |
1806 |
|
|
type => qq'dateortime:syntax error'); |
1807 |
|
|
} |
1808 |
|
|
|
1809 |
|
|
($hour, $minute, $second) = ($1, $9, $10); |
1810 |
|
|
} |
1811 |
|
|
|
1812 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad hour') |
1813 |
|
|
if $hour > 23; |
1814 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad minute') |
1815 |
|
|
if $minute > 59; |
1816 |
|
|
|
1817 |
|
|
if (defined $second) { ## s |
1818 |
|
|
## NOTE: Integer part of second don't have to have length of two. |
1819 |
|
|
|
1820 |
|
|
if (substr ($second, 0, 1) eq '.') { |
1821 |
|
|
$self->{onerror}->(node => $input_node, |
1822 |
|
|
type => 'dateortime:syntax error'); |
1823 |
|
|
} |
1824 |
|
|
|
1825 |
|
|
$self->{onerror}->(node => $input_node, type => 'datetime:bad second') |
1826 |
|
|
if $second >= 60; |
1827 |
|
|
} |
1828 |
|
|
} else { |
1829 |
|
|
$self->{onerror}->(node => $input_node, |
1830 |
|
|
type => 'dateortime:syntax error'); |
1831 |
|
|
} |
1832 |
|
|
|
1833 |
wakaba |
1.40 |
$HTMLPhrasingContentChecker{check_end}->(@_); |
1834 |
wakaba |
1.1 |
}, |
1835 |
|
|
}; |
1836 |
|
|
|
1837 |
|
|
$Element->{$HTML_NS}->{meter} = { ## TODO: "The recommended way of giving the value is to include it as contents of the element" |
1838 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1839 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1840 |
wakaba |
1.1 |
value => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
1841 |
|
|
min => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
1842 |
|
|
low => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
1843 |
|
|
high => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
1844 |
|
|
max => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
1845 |
|
|
optimum => $GetHTMLFloatingPointNumberAttrChecker->(sub { 1 }), |
1846 |
|
|
}), |
1847 |
|
|
}; |
1848 |
|
|
|
1849 |
|
|
$Element->{$HTML_NS}->{progress} = { ## TODO: recommended to use content |
1850 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1851 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1852 |
wakaba |
1.1 |
value => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift >= 0 }), |
1853 |
|
|
max => $GetHTMLFloatingPointNumberAttrChecker->(sub { shift > 0 }), |
1854 |
|
|
}), |
1855 |
|
|
}; |
1856 |
|
|
|
1857 |
|
|
$Element->{$HTML_NS}->{code} = { |
1858 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1859 |
wakaba |
1.1 |
}; |
1860 |
|
|
|
1861 |
|
|
$Element->{$HTML_NS}->{var} = { |
1862 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1863 |
wakaba |
1.1 |
}; |
1864 |
|
|
|
1865 |
|
|
$Element->{$HTML_NS}->{samp} = { |
1866 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1867 |
wakaba |
1.1 |
}; |
1868 |
|
|
|
1869 |
|
|
$Element->{$HTML_NS}->{kbd} = { |
1870 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1871 |
wakaba |
1.1 |
}; |
1872 |
|
|
|
1873 |
|
|
$Element->{$HTML_NS}->{sub} = { |
1874 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1875 |
wakaba |
1.1 |
}; |
1876 |
|
|
|
1877 |
|
|
$Element->{$HTML_NS}->{sup} = { |
1878 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1879 |
wakaba |
1.1 |
}; |
1880 |
|
|
|
1881 |
|
|
$Element->{$HTML_NS}->{span} = { |
1882 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1883 |
wakaba |
1.1 |
}; |
1884 |
|
|
|
1885 |
|
|
$Element->{$HTML_NS}->{i} = { |
1886 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1887 |
wakaba |
1.1 |
}; |
1888 |
|
|
|
1889 |
|
|
$Element->{$HTML_NS}->{b} = { |
1890 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1891 |
wakaba |
1.1 |
}; |
1892 |
|
|
|
1893 |
|
|
$Element->{$HTML_NS}->{bdo} = { |
1894 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
1895 |
|
|
check_attrs => sub { |
1896 |
|
|
my ($self, $item, $element_state) = @_; |
1897 |
|
|
$GetHTMLAttrsChecker->({})->($self, $item, $element_state); |
1898 |
|
|
unless ($item->{node}->has_attribute_ns (undef, 'dir')) { |
1899 |
|
|
$self->{onerror}->(node => $item->{node}, |
1900 |
|
|
type => 'attribute missing:dir'); |
1901 |
wakaba |
1.1 |
} |
1902 |
|
|
}, |
1903 |
|
|
## ISSUE: The spec does not directly say that |dir| is a enumerated attr. |
1904 |
|
|
}; |
1905 |
|
|
|
1906 |
wakaba |
1.29 |
=pod |
1907 |
|
|
|
1908 |
|
|
## TODO: |
1909 |
|
|
|
1910 |
|
|
+ |
1911 |
|
|
+ <p>Partly because of the confusion described above, authors are |
1912 |
|
|
+ strongly recommended to always mark up all paragraphs with the |
1913 |
|
|
+ <code>p</code> element, and to not have any <code>ins</code> or |
1914 |
|
|
+ <code>del</code> elements that cross across any <span |
1915 |
|
|
+ title="paragraph">implied paragraphs</span>.</p> |
1916 |
|
|
+ |
1917 |
|
|
(An informative note) |
1918 |
|
|
|
1919 |
|
|
<p><code>ins</code> elements should not cross <span |
1920 |
|
|
+ title="paragraph">implied paragraph</span> boundaries.</p> |
1921 |
|
|
(normative) |
1922 |
|
|
|
1923 |
|
|
+ <p><code>del</code> elements should not cross <span |
1924 |
|
|
+ title="paragraph">implied paragraph</span> boundaries.</p> |
1925 |
|
|
(normative) |
1926 |
|
|
|
1927 |
|
|
=cut |
1928 |
|
|
|
1929 |
wakaba |
1.1 |
$Element->{$HTML_NS}->{ins} = { |
1930 |
wakaba |
1.40 |
%HTMLTransparentChecker, |
1931 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1932 |
wakaba |
1.1 |
cite => $HTMLURIAttrChecker, |
1933 |
|
|
datetime => $HTMLDatetimeAttrChecker, |
1934 |
|
|
}), |
1935 |
|
|
}; |
1936 |
|
|
|
1937 |
|
|
$Element->{$HTML_NS}->{del} = { |
1938 |
wakaba |
1.40 |
%HTMLTransparentChecker, |
1939 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
1940 |
wakaba |
1.1 |
cite => $HTMLURIAttrChecker, |
1941 |
|
|
datetime => $HTMLDatetimeAttrChecker, |
1942 |
|
|
}), |
1943 |
wakaba |
1.40 |
check_end => sub { |
1944 |
|
|
my ($self, $item, $element_state) = @_; |
1945 |
|
|
if ($element_state->{has_significant}) { |
1946 |
|
|
## NOTE: Significantness flag does not propagate. |
1947 |
|
|
} elsif ($item->{transparent}) { |
1948 |
|
|
# |
1949 |
|
|
} else { |
1950 |
|
|
$self->{onerror}->(node => $item->{node}, |
1951 |
|
|
level => $self->{should_level}, |
1952 |
|
|
type => 'no significant content'); |
1953 |
|
|
} |
1954 |
wakaba |
1.1 |
}, |
1955 |
|
|
}; |
1956 |
|
|
|
1957 |
wakaba |
1.35 |
$Element->{$HTML_NS}->{figure} = { |
1958 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
1959 |
wakaba |
1.41 |
## NOTE: legend, Prose | Prose, legend |
1960 |
|
|
check_child_element => sub { |
1961 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
1962 |
|
|
$child_is_transparent, $element_state) = @_; |
1963 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
1964 |
|
|
$self->{onerror}->(node => $child_el, |
1965 |
|
|
type => 'element not allowed:minus', |
1966 |
|
|
level => $self->{must_level}); |
1967 |
|
|
$element_state->{has_non_legend} = 1; |
1968 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
1969 |
|
|
# |
1970 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'legend') { |
1971 |
|
|
if ($element_state->{has_legend_at_first}) { |
1972 |
|
|
$self->{onerror}->(node => $child_el, |
1973 |
|
|
type => 'element not allowed:figure legend', |
1974 |
|
|
level => $self->{must_level}); |
1975 |
|
|
} elsif ($element_state->{has_legend}) { |
1976 |
|
|
$self->{onerror}->(node => $element_state->{has_legend}, |
1977 |
|
|
type => 'element not allowed:figure legend', |
1978 |
|
|
level => $self->{must_level}); |
1979 |
|
|
$element_state->{has_legend} = $child_el; |
1980 |
|
|
} elsif ($element_state->{has_non_legend}) { |
1981 |
|
|
$element_state->{has_legend} = $child_el; |
1982 |
|
|
} else { |
1983 |
|
|
$element_state->{has_legend_at_first} = 1; |
1984 |
wakaba |
1.35 |
} |
1985 |
wakaba |
1.41 |
delete $element_state->{has_non_legend}; |
1986 |
|
|
} else { |
1987 |
|
|
$HTMLProseContentChecker{check_child_element}->(@_); |
1988 |
|
|
$element_state->{has_non_legend} = 1; |
1989 |
|
|
} |
1990 |
|
|
}, |
1991 |
|
|
check_child_text => sub { |
1992 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
1993 |
|
|
if ($has_significant) { |
1994 |
|
|
$element_state->{has_non_legend} = 1; |
1995 |
wakaba |
1.35 |
} |
1996 |
wakaba |
1.41 |
}, |
1997 |
|
|
check_end => sub { |
1998 |
|
|
my ($self, $item, $element_state) = @_; |
1999 |
wakaba |
1.35 |
|
2000 |
wakaba |
1.41 |
if ($element_state->{has_legend_at_first}) { |
2001 |
|
|
# |
2002 |
|
|
} elsif ($element_state->{has_legend}) { |
2003 |
|
|
if ($element_state->{has_non_legend}) { |
2004 |
|
|
$self->{onerror}->(node => $element_state->{has_legend}, |
2005 |
wakaba |
1.35 |
type => 'element not allowed:figure legend', |
2006 |
|
|
level => $self->{must_level}); |
2007 |
|
|
} |
2008 |
|
|
} else { |
2009 |
wakaba |
1.41 |
$self->{onerror}->(node => $item->{node}, |
2010 |
wakaba |
1.35 |
type => 'element missing:legend', |
2011 |
|
|
level => $self->{must_level}); |
2012 |
|
|
} |
2013 |
wakaba |
1.41 |
|
2014 |
|
|
$HTMLProseContentChecker{check_end}->(@_); |
2015 |
|
|
## ISSUE: |<figure><legend>aa</legend></figure>| should be an error? |
2016 |
wakaba |
1.35 |
}, |
2017 |
|
|
}; |
2018 |
wakaba |
1.8 |
## TODO: Test for <nest/> in <figure/> |
2019 |
wakaba |
1.1 |
|
2020 |
|
|
$Element->{$HTML_NS}->{img} = { |
2021 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2022 |
|
|
check_attrs => sub { |
2023 |
|
|
my ($self, $item, $element_state) = @_; |
2024 |
wakaba |
1.1 |
$GetHTMLAttrsChecker->({ |
2025 |
|
|
alt => sub { }, ## NOTE: No syntactical requirement |
2026 |
|
|
src => $HTMLURIAttrChecker, |
2027 |
|
|
usemap => $HTMLUsemapAttrChecker, |
2028 |
|
|
ismap => sub { |
2029 |
wakaba |
1.40 |
my ($self, $attr, $parent_item) = @_; |
2030 |
|
|
if (not $self->{flag}->{in_a_href}) { |
2031 |
wakaba |
1.15 |
$self->{onerror}->(node => $attr, |
2032 |
|
|
type => 'attribute not allowed:ismap'); |
2033 |
wakaba |
1.1 |
} |
2034 |
wakaba |
1.40 |
$GetHTMLBooleanAttrChecker->('ismap')->($self, $attr, $parent_item); |
2035 |
wakaba |
1.1 |
}, |
2036 |
|
|
## TODO: height |
2037 |
|
|
## TODO: width |
2038 |
wakaba |
1.40 |
})->($self, $item); |
2039 |
|
|
unless ($item->{node}->has_attribute_ns (undef, 'alt')) { |
2040 |
|
|
$self->{onerror}->(node => $item->{node}, |
2041 |
wakaba |
1.37 |
type => 'attribute missing:alt', |
2042 |
|
|
level => $self->{should_level}); |
2043 |
wakaba |
1.1 |
} |
2044 |
wakaba |
1.40 |
unless ($item->{node}->has_attribute_ns (undef, 'src')) { |
2045 |
|
|
$self->{onerror}->(node => $item->{node}, |
2046 |
|
|
type => 'attribute missing:src'); |
2047 |
wakaba |
1.1 |
} |
2048 |
|
|
}, |
2049 |
|
|
}; |
2050 |
|
|
|
2051 |
|
|
$Element->{$HTML_NS}->{iframe} = { |
2052 |
wakaba |
1.40 |
%HTMLTextChecker, |
2053 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2054 |
wakaba |
1.1 |
src => $HTMLURIAttrChecker, |
2055 |
|
|
}), |
2056 |
wakaba |
1.40 |
}; |
2057 |
|
|
|
2058 |
wakaba |
1.1 |
$Element->{$HTML_NS}->{embed} = { |
2059 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2060 |
|
|
check_attrs => sub { |
2061 |
|
|
my ($self, $item, $element_state) = @_; |
2062 |
wakaba |
1.1 |
my $has_src; |
2063 |
wakaba |
1.40 |
for my $attr (@{$item->{node}->attributes}) { |
2064 |
wakaba |
1.1 |
my $attr_ns = $attr->namespace_uri; |
2065 |
|
|
$attr_ns = '' unless defined $attr_ns; |
2066 |
|
|
my $attr_ln = $attr->manakai_local_name; |
2067 |
|
|
my $checker; |
2068 |
|
|
if ($attr_ns eq '') { |
2069 |
|
|
if ($attr_ln eq 'src') { |
2070 |
|
|
$checker = $HTMLURIAttrChecker; |
2071 |
|
|
$has_src = 1; |
2072 |
|
|
} elsif ($attr_ln eq 'type') { |
2073 |
|
|
$checker = $HTMLIMTAttrChecker; |
2074 |
|
|
} else { |
2075 |
|
|
## TODO: height |
2076 |
|
|
## TODO: width |
2077 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln} |
2078 |
|
|
|| sub { }; ## NOTE: Any local attribute is ok. |
2079 |
|
|
} |
2080 |
|
|
} |
2081 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
2082 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
2083 |
|
|
if ($checker) { |
2084 |
|
|
$checker->($self, $attr); |
2085 |
|
|
} else { |
2086 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
2087 |
|
|
type => 'attribute'); |
2088 |
|
|
## ISSUE: No comformance createria for global attributes in the spec |
2089 |
|
|
} |
2090 |
|
|
} |
2091 |
|
|
|
2092 |
|
|
unless ($has_src) { |
2093 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
2094 |
wakaba |
1.1 |
type => 'attribute missing:src'); |
2095 |
|
|
} |
2096 |
|
|
}, |
2097 |
|
|
}; |
2098 |
|
|
|
2099 |
|
|
$Element->{$HTML_NS}->{object} = { |
2100 |
wakaba |
1.40 |
%HTMLTransparentChecker, |
2101 |
|
|
check_attrs => sub { |
2102 |
|
|
my ($self, $item, $element_state) = @_; |
2103 |
wakaba |
1.1 |
$GetHTMLAttrsChecker->({ |
2104 |
|
|
data => $HTMLURIAttrChecker, |
2105 |
|
|
type => $HTMLIMTAttrChecker, |
2106 |
|
|
usemap => $HTMLUsemapAttrChecker, |
2107 |
|
|
## TODO: width |
2108 |
|
|
## TODO: height |
2109 |
wakaba |
1.40 |
})->($self, $item); |
2110 |
|
|
unless ($item->{node}->has_attribute_ns (undef, 'data')) { |
2111 |
|
|
unless ($item->{node}->has_attribute_ns (undef, 'type')) { |
2112 |
|
|
$self->{onerror}->(node => $item->{node}, |
2113 |
wakaba |
1.1 |
type => 'attribute missing:data|type'); |
2114 |
|
|
} |
2115 |
|
|
} |
2116 |
|
|
}, |
2117 |
wakaba |
1.41 |
## NOTE: param*, transparent (Prose) |
2118 |
|
|
check_child_element => sub { |
2119 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2120 |
|
|
$child_is_transparent, $element_state) = @_; |
2121 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2122 |
|
|
$self->{onerror}->(node => $child_el, |
2123 |
|
|
type => 'element not allowed:minus', |
2124 |
|
|
level => $self->{must_level}); |
2125 |
|
|
$element_state->{has_non_legend} = 1; |
2126 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2127 |
|
|
# |
2128 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'param') { |
2129 |
|
|
if ($element_state->{has_non_param}) { |
2130 |
|
|
$self->{onerror}->(node => $child_el, |
2131 |
|
|
type => 'element not allowed:prose', |
2132 |
|
|
level => $self->{must_level}); |
2133 |
wakaba |
1.39 |
} |
2134 |
wakaba |
1.41 |
} else { |
2135 |
|
|
$HTMLProseContentChecker{check_child_element}->(@_); |
2136 |
|
|
$element_state->{has_non_param} = 1; |
2137 |
wakaba |
1.39 |
} |
2138 |
wakaba |
1.25 |
}, |
2139 |
wakaba |
1.41 |
check_child_text => sub { |
2140 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2141 |
|
|
if ($has_significant) { |
2142 |
|
|
$element_state->{has_non_param} = 1; |
2143 |
|
|
} |
2144 |
|
|
}, |
2145 |
wakaba |
1.8 |
## TODO: Tests for <nest/> in <object/> |
2146 |
wakaba |
1.1 |
}; |
2147 |
wakaba |
1.41 |
## ISSUE: Is |<menu><object data><li>aa</li></object></menu>| conforming? |
2148 |
|
|
## What about |<section><object data><style scoped></style>x</object></section>|? |
2149 |
|
|
## |<section><ins></ins><object data><style scoped></style>x</object></section>|? |
2150 |
wakaba |
1.1 |
|
2151 |
|
|
$Element->{$HTML_NS}->{param} = { |
2152 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2153 |
|
|
check_attrs => sub { |
2154 |
|
|
my ($self, $item, $element_state) = @_; |
2155 |
wakaba |
1.1 |
$GetHTMLAttrsChecker->({ |
2156 |
|
|
name => sub { }, |
2157 |
|
|
value => sub { }, |
2158 |
wakaba |
1.40 |
})->($self, $item); |
2159 |
|
|
unless ($item->{node}->has_attribute_ns (undef, 'name')) { |
2160 |
|
|
$self->{onerror}->(node => $item->{node}, |
2161 |
wakaba |
1.1 |
type => 'attribute missing:name'); |
2162 |
|
|
} |
2163 |
wakaba |
1.40 |
unless ($item->{node}->has_attribute_ns (undef, 'value')) { |
2164 |
|
|
$self->{onerror}->(node => $item->{node}, |
2165 |
wakaba |
1.1 |
type => 'attribute missing:value'); |
2166 |
|
|
} |
2167 |
|
|
}, |
2168 |
|
|
}; |
2169 |
|
|
|
2170 |
|
|
$Element->{$HTML_NS}->{video} = { |
2171 |
wakaba |
1.40 |
%HTMLTransparentChecker, |
2172 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2173 |
wakaba |
1.1 |
src => $HTMLURIAttrChecker, |
2174 |
|
|
## TODO: start, loopstart, loopend, end |
2175 |
|
|
## ISSUE: they MUST be "value time offset"s. Value? |
2176 |
wakaba |
1.11 |
## ISSUE: playcount has no conformance creteria |
2177 |
wakaba |
1.1 |
autoplay => $GetHTMLBooleanAttrChecker->('autoplay'), |
2178 |
|
|
controls => $GetHTMLBooleanAttrChecker->('controls'), |
2179 |
wakaba |
1.11 |
poster => $HTMLURIAttrChecker, ## TODO: not for audio! |
2180 |
|
|
## TODO: width, height (not for audio!) |
2181 |
wakaba |
1.1 |
}), |
2182 |
wakaba |
1.40 |
|
2183 |
|
|
## TODO: reimplement |
2184 |
wakaba |
1.1 |
checker => sub { |
2185 |
|
|
my ($self, $todo) = @_; |
2186 |
wakaba |
1.25 |
$todo->{flag}->{has_descendant}->{significant} = 1; |
2187 |
wakaba |
1.1 |
|
2188 |
wakaba |
1.29 |
## TODO: |
2189 |
wakaba |
1.1 |
if ($todo->{node}->has_attribute_ns (undef, 'src')) { |
2190 |
wakaba |
1.40 |
# return $HTMLProseContentChecker->($self, $todo); |
2191 |
wakaba |
1.1 |
} else { |
2192 |
wakaba |
1.40 |
# return $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'source') |
2193 |
|
|
# ->($self, $todo); |
2194 |
wakaba |
1.1 |
} |
2195 |
|
|
}, |
2196 |
|
|
}; |
2197 |
|
|
|
2198 |
|
|
$Element->{$HTML_NS}->{audio} = { |
2199 |
wakaba |
1.40 |
%{$Element->{$HTML_NS}->{video}}, |
2200 |
|
|
## TODO: Is there audio-only attribute? |
2201 |
wakaba |
1.1 |
}; |
2202 |
|
|
|
2203 |
|
|
$Element->{$HTML_NS}->{source} = { |
2204 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2205 |
|
|
check_attrs => sub { |
2206 |
|
|
my ($self, $item, $element_state) = @_; |
2207 |
wakaba |
1.1 |
$GetHTMLAttrsChecker->({ |
2208 |
|
|
src => $HTMLURIAttrChecker, |
2209 |
|
|
type => $HTMLIMTAttrChecker, |
2210 |
|
|
media => $HTMLMQAttrChecker, |
2211 |
wakaba |
1.40 |
})->($self, $item, $element_state); |
2212 |
|
|
unless ($item->{node}->has_attribute_ns (undef, 'src')) { |
2213 |
|
|
$self->{onerror}->(node => $item->{node}, |
2214 |
wakaba |
1.1 |
type => 'attribute missing:src'); |
2215 |
|
|
} |
2216 |
|
|
}, |
2217 |
|
|
}; |
2218 |
|
|
|
2219 |
|
|
$Element->{$HTML_NS}->{canvas} = { |
2220 |
wakaba |
1.40 |
%HTMLTransparentChecker, |
2221 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2222 |
wakaba |
1.1 |
height => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), |
2223 |
|
|
width => $GetHTMLNonNegativeIntegerAttrChecker->(sub { 1 }), |
2224 |
|
|
}), |
2225 |
|
|
}; |
2226 |
|
|
|
2227 |
|
|
$Element->{$HTML_NS}->{map} = { |
2228 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
2229 |
|
|
check_attrs => sub { |
2230 |
|
|
my ($self, $item, $element_state) = @_; |
2231 |
wakaba |
1.4 |
my $has_id; |
2232 |
|
|
$GetHTMLAttrsChecker->({ |
2233 |
|
|
id => sub { |
2234 |
|
|
## NOTE: same as global |id=""|, with |$self->{map}| registeration |
2235 |
|
|
my ($self, $attr) = @_; |
2236 |
|
|
my $value = $attr->value; |
2237 |
|
|
if (length $value > 0) { |
2238 |
|
|
if ($self->{id}->{$value}) { |
2239 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate ID'); |
2240 |
|
|
push @{$self->{id}->{$value}}, $attr; |
2241 |
|
|
} else { |
2242 |
|
|
$self->{id}->{$value} = [$attr]; |
2243 |
|
|
} |
2244 |
wakaba |
1.1 |
} else { |
2245 |
wakaba |
1.4 |
## NOTE: MUST contain at least one character |
2246 |
|
|
$self->{onerror}->(node => $attr, type => 'empty attribute value'); |
2247 |
wakaba |
1.1 |
} |
2248 |
wakaba |
1.4 |
if ($value =~ /[\x09-\x0D\x20]/) { |
2249 |
|
|
$self->{onerror}->(node => $attr, type => 'space in ID'); |
2250 |
|
|
} |
2251 |
|
|
$self->{map}->{$value} ||= $attr; |
2252 |
|
|
$has_id = 1; |
2253 |
|
|
}, |
2254 |
wakaba |
1.40 |
})->($self, $item, $element_state); |
2255 |
|
|
$self->{onerror}->(node => $item->{node}, type => 'attribute missing:id') |
2256 |
wakaba |
1.4 |
unless $has_id; |
2257 |
|
|
}, |
2258 |
wakaba |
1.1 |
}; |
2259 |
|
|
|
2260 |
|
|
$Element->{$HTML_NS}->{area} = { |
2261 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2262 |
|
|
check_attrs => sub { |
2263 |
|
|
my ($self, $item, $element_state) = @_; |
2264 |
wakaba |
1.1 |
my %attr; |
2265 |
|
|
my $coords; |
2266 |
wakaba |
1.40 |
for my $attr (@{$item->{node}->attributes}) { |
2267 |
wakaba |
1.1 |
my $attr_ns = $attr->namespace_uri; |
2268 |
|
|
$attr_ns = '' unless defined $attr_ns; |
2269 |
|
|
my $attr_ln = $attr->manakai_local_name; |
2270 |
|
|
my $checker; |
2271 |
|
|
if ($attr_ns eq '') { |
2272 |
|
|
$checker = { |
2273 |
|
|
alt => sub { }, |
2274 |
|
|
## NOTE: |alt| value has no conformance creteria. |
2275 |
|
|
shape => $GetHTMLEnumeratedAttrChecker->({ |
2276 |
|
|
circ => -1, circle => 1, |
2277 |
|
|
default => 1, |
2278 |
|
|
poly => 1, polygon => -1, |
2279 |
|
|
rect => 1, rectangle => -1, |
2280 |
|
|
}), |
2281 |
|
|
coords => sub { |
2282 |
|
|
my ($self, $attr) = @_; |
2283 |
|
|
my $value = $attr->value; |
2284 |
|
|
if ($value =~ /\A-?[0-9]+(?>,-?[0-9]+)*\z/) { |
2285 |
|
|
$coords = [split /,/, $value]; |
2286 |
|
|
} else { |
2287 |
|
|
$self->{onerror}->(node => $attr, |
2288 |
|
|
type => 'coords:syntax error'); |
2289 |
|
|
} |
2290 |
|
|
}, |
2291 |
|
|
target => $HTMLTargetAttrChecker, |
2292 |
|
|
href => $HTMLURIAttrChecker, |
2293 |
|
|
ping => $HTMLSpaceURIsAttrChecker, |
2294 |
wakaba |
1.40 |
rel => sub { $HTMLLinkTypesAttrChecker->(1, $item, @_) }, |
2295 |
wakaba |
1.1 |
media => $HTMLMQAttrChecker, |
2296 |
|
|
hreflang => $HTMLLanguageTagAttrChecker, |
2297 |
|
|
type => $HTMLIMTAttrChecker, |
2298 |
|
|
}->{$attr_ln}; |
2299 |
|
|
if ($checker) { |
2300 |
|
|
$attr{$attr_ln} = $attr; |
2301 |
|
|
} else { |
2302 |
|
|
$checker = $HTMLAttrChecker->{$attr_ln}; |
2303 |
|
|
} |
2304 |
|
|
} |
2305 |
|
|
$checker ||= $AttrChecker->{$attr_ns}->{$attr_ln} |
2306 |
|
|
|| $AttrChecker->{$attr_ns}->{''}; |
2307 |
|
|
if ($checker) { |
2308 |
|
|
$checker->($self, $attr) if ref $checker; |
2309 |
|
|
} else { |
2310 |
|
|
$self->{onerror}->(node => $attr, level => 'unsupported', |
2311 |
|
|
type => 'attribute'); |
2312 |
|
|
## ISSUE: No comformance createria for unknown attributes in the spec |
2313 |
|
|
} |
2314 |
|
|
} |
2315 |
|
|
|
2316 |
|
|
if (defined $attr{href}) { |
2317 |
wakaba |
1.4 |
$self->{has_hyperlink_element} = 1; |
2318 |
wakaba |
1.1 |
unless (defined $attr{alt}) { |
2319 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
2320 |
wakaba |
1.1 |
type => 'attribute missing:alt'); |
2321 |
|
|
} |
2322 |
|
|
} else { |
2323 |
|
|
for (qw/target ping rel media hreflang type alt/) { |
2324 |
|
|
if (defined $attr{$_}) { |
2325 |
|
|
$self->{onerror}->(node => $attr{$_}, |
2326 |
|
|
type => 'attribute not allowed'); |
2327 |
|
|
} |
2328 |
|
|
} |
2329 |
|
|
} |
2330 |
|
|
|
2331 |
|
|
my $shape = 'rectangle'; |
2332 |
|
|
if (defined $attr{shape}) { |
2333 |
|
|
$shape = { |
2334 |
|
|
circ => 'circle', circle => 'circle', |
2335 |
|
|
default => 'default', |
2336 |
|
|
poly => 'polygon', polygon => 'polygon', |
2337 |
|
|
rect => 'rectangle', rectangle => 'rectangle', |
2338 |
|
|
}->{lc $attr{shape}->value} || 'rectangle'; |
2339 |
|
|
## TODO: ASCII lowercase? |
2340 |
|
|
} |
2341 |
|
|
|
2342 |
|
|
if ($shape eq 'circle') { |
2343 |
|
|
if (defined $attr{coords}) { |
2344 |
|
|
if (defined $coords) { |
2345 |
|
|
if (@$coords == 3) { |
2346 |
|
|
if ($coords->[2] < 0) { |
2347 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2348 |
|
|
type => 'coords:out of range:2'); |
2349 |
|
|
} |
2350 |
|
|
} else { |
2351 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2352 |
|
|
type => 'coords:number:3:'.@$coords); |
2353 |
|
|
} |
2354 |
|
|
} else { |
2355 |
|
|
## NOTE: A syntax error has been reported. |
2356 |
|
|
} |
2357 |
|
|
} else { |
2358 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
2359 |
wakaba |
1.1 |
type => 'attribute missing:coords'); |
2360 |
|
|
} |
2361 |
|
|
} elsif ($shape eq 'default') { |
2362 |
|
|
if (defined $attr{coords}) { |
2363 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2364 |
|
|
type => 'attribute not allowed'); |
2365 |
|
|
} |
2366 |
|
|
} elsif ($shape eq 'polygon') { |
2367 |
|
|
if (defined $attr{coords}) { |
2368 |
|
|
if (defined $coords) { |
2369 |
|
|
if (@$coords >= 6) { |
2370 |
|
|
unless (@$coords % 2 == 0) { |
2371 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2372 |
|
|
type => 'coords:number:even:'.@$coords); |
2373 |
|
|
} |
2374 |
|
|
} else { |
2375 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2376 |
|
|
type => 'coords:number:>=6:'.@$coords); |
2377 |
|
|
} |
2378 |
|
|
} else { |
2379 |
|
|
## NOTE: A syntax error has been reported. |
2380 |
|
|
} |
2381 |
|
|
} else { |
2382 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
2383 |
wakaba |
1.1 |
type => 'attribute missing:coords'); |
2384 |
|
|
} |
2385 |
|
|
} elsif ($shape eq 'rectangle') { |
2386 |
|
|
if (defined $attr{coords}) { |
2387 |
|
|
if (defined $coords) { |
2388 |
|
|
if (@$coords == 4) { |
2389 |
|
|
unless ($coords->[0] < $coords->[2]) { |
2390 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2391 |
|
|
type => 'coords:out of range:0'); |
2392 |
|
|
} |
2393 |
|
|
unless ($coords->[1] < $coords->[3]) { |
2394 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2395 |
|
|
type => 'coords:out of range:1'); |
2396 |
|
|
} |
2397 |
|
|
} else { |
2398 |
|
|
$self->{onerror}->(node => $attr{coords}, |
2399 |
|
|
type => 'coords:number:4:'.@$coords); |
2400 |
|
|
} |
2401 |
|
|
} else { |
2402 |
|
|
## NOTE: A syntax error has been reported. |
2403 |
|
|
} |
2404 |
|
|
} else { |
2405 |
wakaba |
1.40 |
$self->{onerror}->(node => $item->{node}, |
2406 |
wakaba |
1.1 |
type => 'attribute missing:coords'); |
2407 |
|
|
} |
2408 |
|
|
} |
2409 |
|
|
}, |
2410 |
|
|
}; |
2411 |
|
|
## TODO: only in map |
2412 |
|
|
|
2413 |
|
|
$Element->{$HTML_NS}->{table} = { |
2414 |
wakaba |
1.40 |
%HTMLChecker, |
2415 |
|
|
check_start => sub { |
2416 |
|
|
my ($self, $item, $element_state) = @_; |
2417 |
|
|
$element_state->{phase} = 'before caption'; |
2418 |
|
|
}, |
2419 |
|
|
check_child_element => sub { |
2420 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2421 |
|
|
$child_is_transparent, $element_state) = @_; |
2422 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2423 |
|
|
$self->{onerror}->(node => $child_el, |
2424 |
|
|
type => 'element not allowed:minus', |
2425 |
|
|
level => $self->{must_level}); |
2426 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2427 |
|
|
# |
2428 |
|
|
} elsif ($element_state->{phase} eq 'in tbodys') { |
2429 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') { |
2430 |
|
|
#$element_state->{phase} = 'in tbodys'; |
2431 |
|
|
} elsif (not $element_state->{has_tfoot} and |
2432 |
|
|
$child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') { |
2433 |
|
|
$element_state->{phase} = 'after tfoot'; |
2434 |
|
|
$element_state->{has_tfoot} = 1; |
2435 |
|
|
} else { |
2436 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2437 |
|
|
} |
2438 |
|
|
} elsif ($element_state->{phase} eq 'in trs') { |
2439 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') { |
2440 |
|
|
#$element_state->{phase} = 'in trs'; |
2441 |
|
|
} elsif (not $element_state->{has_tfoot} and |
2442 |
|
|
$child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') { |
2443 |
|
|
$element_state->{phase} = 'after tfoot'; |
2444 |
|
|
$element_state->{has_tfoot} = 1; |
2445 |
|
|
} else { |
2446 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2447 |
|
|
} |
2448 |
|
|
} elsif ($element_state->{phase} eq 'after thead') { |
2449 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') { |
2450 |
|
|
$element_state->{phase} = 'in tbodys'; |
2451 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') { |
2452 |
|
|
$element_state->{phase} = 'in trs'; |
2453 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') { |
2454 |
|
|
$element_state->{phase} = 'in tbodys'; |
2455 |
|
|
$element_state->{has_tfoot} = 1; |
2456 |
|
|
} else { |
2457 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2458 |
|
|
} |
2459 |
|
|
} elsif ($element_state->{phase} eq 'in colgroup') { |
2460 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') { |
2461 |
|
|
$element_state->{phase} = 'in colgroup'; |
2462 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') { |
2463 |
|
|
$element_state->{phase} = 'after thead'; |
2464 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') { |
2465 |
|
|
$element_state->{phase} = 'in tbodys'; |
2466 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') { |
2467 |
|
|
$element_state->{phase} = 'in trs'; |
2468 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') { |
2469 |
|
|
$element_state->{phase} = 'in tbodys'; |
2470 |
|
|
$element_state->{has_tfoot} = 1; |
2471 |
|
|
} else { |
2472 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2473 |
|
|
} |
2474 |
|
|
} elsif ($element_state->{phase} eq 'before caption') { |
2475 |
|
|
if ($child_nsuri eq $HTML_NS and $child_ln eq 'caption') { |
2476 |
|
|
$element_state->{phase} = 'in colgroup'; |
2477 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'colgroup') { |
2478 |
|
|
$element_state->{phase} = 'in colgroup'; |
2479 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'thead') { |
2480 |
|
|
$element_state->{phase} = 'after thead'; |
2481 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tbody') { |
2482 |
|
|
$element_state->{phase} = 'in tbodys'; |
2483 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') { |
2484 |
|
|
$element_state->{phase} = 'in trs'; |
2485 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tfoot') { |
2486 |
|
|
$element_state->{phase} = 'in tbodys'; |
2487 |
|
|
$element_state->{has_tfoot} = 1; |
2488 |
|
|
} else { |
2489 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2490 |
|
|
} |
2491 |
|
|
} elsif ($element_state->{phase} eq 'after tfoot') { |
2492 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2493 |
|
|
} else { |
2494 |
|
|
die "check_child_element: Bad |table| phase: $element_state->{phase}"; |
2495 |
|
|
} |
2496 |
|
|
}, |
2497 |
|
|
check_child_text => sub { |
2498 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2499 |
|
|
if ($has_significant) { |
2500 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
2501 |
wakaba |
1.1 |
} |
2502 |
wakaba |
1.40 |
}, |
2503 |
|
|
check_end => sub { |
2504 |
|
|
my ($self, $item, $element_state) = @_; |
2505 |
wakaba |
1.1 |
|
2506 |
|
|
## Table model errors |
2507 |
|
|
require Whatpm::HTMLTable; |
2508 |
wakaba |
1.40 |
Whatpm::HTMLTable->form_table ($item->{node}, sub { |
2509 |
wakaba |
1.1 |
my %opt = @_; |
2510 |
|
|
$self->{onerror}->(type => 'table:'.$opt{type}, node => $opt{node}); |
2511 |
|
|
}); |
2512 |
wakaba |
1.40 |
push @{$self->{return}->{table}}, $item->{node}; |
2513 |
wakaba |
1.1 |
|
2514 |
wakaba |
1.40 |
$HTMLChecker{check_end}->(@_); |
2515 |
wakaba |
1.1 |
}, |
2516 |
|
|
}; |
2517 |
|
|
|
2518 |
|
|
$Element->{$HTML_NS}->{caption} = { |
2519 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
2520 |
wakaba |
1.1 |
}; |
2521 |
|
|
|
2522 |
|
|
$Element->{$HTML_NS}->{colgroup} = { |
2523 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2524 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2525 |
wakaba |
1.1 |
span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2526 |
|
|
## NOTE: Defined only if "the |colgroup| element contains no |col| elements" |
2527 |
|
|
## TODO: "attribute not supported" if |col|. |
2528 |
|
|
## ISSUE: MUST NOT if any |col|? |
2529 |
|
|
## ISSUE: MUST NOT for |<colgroup span="1"><any><col/></any></colgroup>| (though non-conforming)? |
2530 |
|
|
}), |
2531 |
wakaba |
1.40 |
check_child_element => sub { |
2532 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2533 |
|
|
$child_is_transparent, $element_state) = @_; |
2534 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2535 |
|
|
$self->{onerror}->(node => $child_el, |
2536 |
|
|
type => 'element not allowed:minus', |
2537 |
|
|
level => $self->{must_level}); |
2538 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2539 |
|
|
# |
2540 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'col') { |
2541 |
|
|
# |
2542 |
|
|
} else { |
2543 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2544 |
|
|
} |
2545 |
|
|
}, |
2546 |
|
|
check_child_text => sub { |
2547 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2548 |
|
|
if ($has_significant) { |
2549 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
2550 |
wakaba |
1.1 |
} |
2551 |
|
|
}, |
2552 |
|
|
}; |
2553 |
|
|
|
2554 |
|
|
$Element->{$HTML_NS}->{col} = { |
2555 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2556 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2557 |
wakaba |
1.1 |
span => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2558 |
|
|
}), |
2559 |
|
|
}; |
2560 |
|
|
|
2561 |
|
|
$Element->{$HTML_NS}->{tbody} = { |
2562 |
wakaba |
1.40 |
%HTMLChecker, |
2563 |
|
|
check_child_element => sub { |
2564 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2565 |
|
|
$child_is_transparent, $element_state) = @_; |
2566 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2567 |
|
|
$self->{onerror}->(node => $child_el, |
2568 |
|
|
type => 'element not allowed:minus', |
2569 |
|
|
level => $self->{must_level}); |
2570 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2571 |
|
|
# |
2572 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'tr') { |
2573 |
|
|
$element_state->{has_tr} = 1; |
2574 |
|
|
} else { |
2575 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2576 |
|
|
} |
2577 |
|
|
}, |
2578 |
|
|
check_child_text => sub { |
2579 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2580 |
|
|
if ($has_significant) { |
2581 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
2582 |
wakaba |
1.1 |
} |
2583 |
wakaba |
1.40 |
}, |
2584 |
|
|
check_end => sub { |
2585 |
|
|
my ($self, $item, $element_state) = @_; |
2586 |
|
|
unless ($element_state->{has_tr}) { |
2587 |
|
|
$self->{onerror}->(node => $item->{node}, |
2588 |
|
|
type => 'child element missing:tr'); |
2589 |
wakaba |
1.1 |
} |
2590 |
wakaba |
1.40 |
|
2591 |
|
|
$HTMLChecker{check_end}->(@_); |
2592 |
wakaba |
1.1 |
}, |
2593 |
|
|
}; |
2594 |
|
|
|
2595 |
|
|
$Element->{$HTML_NS}->{thead} = { |
2596 |
wakaba |
1.40 |
%{$Element->{$HTML_NS}->{tbody}}, |
2597 |
wakaba |
1.1 |
}; |
2598 |
|
|
|
2599 |
|
|
$Element->{$HTML_NS}->{tfoot} = { |
2600 |
wakaba |
1.40 |
%{$Element->{$HTML_NS}->{tbody}}, |
2601 |
wakaba |
1.1 |
}; |
2602 |
|
|
|
2603 |
|
|
$Element->{$HTML_NS}->{tr} = { |
2604 |
wakaba |
1.40 |
%HTMLChecker, |
2605 |
|
|
check_child_element => sub { |
2606 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2607 |
|
|
$child_is_transparent, $element_state) = @_; |
2608 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2609 |
|
|
$self->{onerror}->(node => $child_el, |
2610 |
|
|
type => 'element not allowed:minus', |
2611 |
|
|
level => $self->{must_level}); |
2612 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2613 |
|
|
# |
2614 |
|
|
} elsif ($child_nsuri eq $HTML_NS and |
2615 |
|
|
($child_ln eq 'td' or $child_ln eq 'th')) { |
2616 |
|
|
$element_state->{has_cell} = 1; |
2617 |
|
|
} else { |
2618 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2619 |
|
|
} |
2620 |
|
|
}, |
2621 |
|
|
check_child_text => sub { |
2622 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2623 |
|
|
if ($has_significant) { |
2624 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
2625 |
wakaba |
1.1 |
} |
2626 |
wakaba |
1.40 |
}, |
2627 |
|
|
check_end => sub { |
2628 |
|
|
my ($self, $item, $element_state) = @_; |
2629 |
|
|
unless ($element_state->{has_cell}) { |
2630 |
|
|
$self->{onerror}->(node => $item->{node}, |
2631 |
|
|
type => 'child element missing:td|th'); |
2632 |
wakaba |
1.1 |
} |
2633 |
wakaba |
1.40 |
|
2634 |
|
|
$HTMLChecker{check_end}->(@_); |
2635 |
wakaba |
1.1 |
}, |
2636 |
|
|
}; |
2637 |
|
|
|
2638 |
|
|
$Element->{$HTML_NS}->{td} = { |
2639 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
2640 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2641 |
wakaba |
1.1 |
colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2642 |
|
|
rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2643 |
|
|
}), |
2644 |
|
|
}; |
2645 |
|
|
|
2646 |
|
|
$Element->{$HTML_NS}->{th} = { |
2647 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
2648 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2649 |
wakaba |
1.1 |
colspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2650 |
|
|
rowspan => $GetHTMLNonNegativeIntegerAttrChecker->(sub { shift > 0 }), |
2651 |
|
|
scope => $GetHTMLEnumeratedAttrChecker |
2652 |
|
|
->({row => 1, col => 1, rowgroup => 1, colgroup => 1}), |
2653 |
|
|
}), |
2654 |
|
|
}; |
2655 |
|
|
|
2656 |
|
|
## TODO: forms |
2657 |
wakaba |
1.8 |
## TODO: Tests for <nest/> in form elements |
2658 |
wakaba |
1.1 |
|
2659 |
|
|
$Element->{$HTML_NS}->{script} = { |
2660 |
wakaba |
1.40 |
%HTMLChecker, |
2661 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2662 |
wakaba |
1.1 |
src => $HTMLURIAttrChecker, |
2663 |
|
|
defer => $GetHTMLBooleanAttrChecker->('defer'), |
2664 |
|
|
async => $GetHTMLBooleanAttrChecker->('async'), |
2665 |
|
|
type => $HTMLIMTAttrChecker, |
2666 |
wakaba |
1.9 |
}), |
2667 |
wakaba |
1.40 |
check_start => sub { |
2668 |
|
|
my ($self, $item, $element_state) = @_; |
2669 |
wakaba |
1.1 |
|
2670 |
wakaba |
1.40 |
if ($item->{node}->has_attribute_ns (undef, 'src')) { |
2671 |
|
|
$element_state->{must_be_empty} = 1; |
2672 |
wakaba |
1.1 |
} else { |
2673 |
|
|
## NOTE: No content model conformance in HTML5 spec. |
2674 |
wakaba |
1.40 |
my $type = $item->{node}->get_attribute_ns (undef, 'type'); |
2675 |
|
|
my $language = $item->{node}->get_attribute_ns (undef, 'language'); |
2676 |
wakaba |
1.1 |
if ((defined $type and $type eq '') or |
2677 |
|
|
(defined $language and $language eq '')) { |
2678 |
|
|
$type = 'text/javascript'; |
2679 |
|
|
} elsif (defined $type) { |
2680 |
|
|
# |
2681 |
|
|
} elsif (defined $language) { |
2682 |
|
|
$type = 'text/' . $language; |
2683 |
|
|
} else { |
2684 |
|
|
$type = 'text/javascript'; |
2685 |
|
|
} |
2686 |
wakaba |
1.40 |
$element_state->{script_type} = $type; ## TODO: $type normalization |
2687 |
|
|
} |
2688 |
|
|
}, |
2689 |
|
|
check_child_element => sub { |
2690 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2691 |
|
|
$child_is_transparent, $element_state) = @_; |
2692 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2693 |
|
|
$self->{onerror}->(node => $child_el, |
2694 |
|
|
type => 'element not allowed:minus', |
2695 |
|
|
level => $self->{must_level}); |
2696 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2697 |
|
|
# |
2698 |
|
|
} else { |
2699 |
|
|
if ($element_state->{must_be_empty}) { |
2700 |
|
|
$self->{onerror}->(node => $child_el, |
2701 |
|
|
type => 'element not allowed'); |
2702 |
|
|
} |
2703 |
|
|
} |
2704 |
|
|
}, |
2705 |
|
|
check_child_text => sub { |
2706 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2707 |
|
|
if ($has_significant and |
2708 |
|
|
$element_state->{must_be_empty}) { |
2709 |
|
|
$self->{onerror}->(node => $child_node, |
2710 |
|
|
type => 'character not allowed'); |
2711 |
|
|
} |
2712 |
|
|
}, |
2713 |
|
|
check_end => sub { |
2714 |
|
|
my ($self, $item, $element_state) = @_; |
2715 |
|
|
unless ($element_state->{must_be_empty}) { |
2716 |
|
|
$self->{onerror}->(node => $item->{node}, level => 'unsupported', |
2717 |
|
|
type => 'script:'.$element_state->{script_type}); |
2718 |
|
|
## TODO: text/javascript support |
2719 |
|
|
|
2720 |
|
|
$HTMLChecker{check_end}->(@_); |
2721 |
wakaba |
1.1 |
} |
2722 |
|
|
}, |
2723 |
|
|
}; |
2724 |
wakaba |
1.25 |
## ISSUE: Significant check and text child node |
2725 |
wakaba |
1.1 |
|
2726 |
|
|
## NOTE: When script is disabled. |
2727 |
|
|
$Element->{$HTML_NS}->{noscript} = { |
2728 |
wakaba |
1.40 |
%HTMLTransparentChecker, |
2729 |
|
|
check_start => sub { |
2730 |
|
|
my ($self, $item, $element_state) = @_; |
2731 |
wakaba |
1.3 |
|
2732 |
wakaba |
1.40 |
unless ($item->{node}->owner_document->manakai_is_html) { |
2733 |
|
|
$self->{onerror}->(node => $item->{node}, type => 'in XML:noscript'); |
2734 |
wakaba |
1.3 |
} |
2735 |
|
|
|
2736 |
wakaba |
1.40 |
unless ($self->{flag}->{in_head}) { |
2737 |
|
|
$self->_add_minus_elements ($element_state, |
2738 |
|
|
{$HTML_NS => {noscript => 1}}); |
2739 |
|
|
} |
2740 |
wakaba |
1.3 |
}, |
2741 |
wakaba |
1.40 |
check_child_element => sub { |
2742 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2743 |
|
|
$child_is_transparent, $element_state) = @_; |
2744 |
|
|
if ($self->{flag}->{in_head}) { |
2745 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2746 |
|
|
$self->{onerror}->(node => $child_el, |
2747 |
|
|
type => 'element not allowed:minus', |
2748 |
|
|
level => $self->{must_level}); |
2749 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2750 |
|
|
# |
2751 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'link') { |
2752 |
|
|
# |
2753 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'style') { |
2754 |
|
|
if ($child_el->has_attribute_ns (undef, 'scoped')) { |
2755 |
|
|
$self->{onerror}->(node => $child_el, |
2756 |
|
|
type => 'element not allowed:head noscript', |
2757 |
|
|
level => $self->{must_level}); |
2758 |
|
|
} |
2759 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'meta') { |
2760 |
|
|
if ($child_el->has_attribute_ns (undef, 'charset')) { |
2761 |
|
|
## NOTE: Non-conforming. An error is raised by |
2762 |
|
|
## |meta|'s checker. |
2763 |
|
|
} else { |
2764 |
|
|
my $http_equiv_attr |
2765 |
|
|
= $child_el->get_attribute_node_ns (undef, 'http-equiv'); |
2766 |
|
|
if ($http_equiv_attr) { |
2767 |
|
|
## TODO: case |
2768 |
|
|
if (lc $http_equiv_attr->value eq 'content-type') { |
2769 |
|
|
## NOTE: Non-conforming. An error is raised by |
2770 |
|
|
## |meta|'s checker. |
2771 |
|
|
} else { |
2772 |
wakaba |
1.3 |
# |
2773 |
|
|
} |
2774 |
|
|
} else { |
2775 |
wakaba |
1.40 |
$self->{onerror}->(node => $child_el, |
2776 |
wakaba |
1.34 |
type => 'element not allowed:head noscript', |
2777 |
|
|
level => $self->{must_level}); |
2778 |
wakaba |
1.3 |
} |
2779 |
|
|
} |
2780 |
wakaba |
1.40 |
} else { |
2781 |
|
|
$self->{onerror}->(node => $child_el, |
2782 |
|
|
type => 'element not allowed:head noscript', |
2783 |
|
|
level => $self->{must_level}); |
2784 |
|
|
} |
2785 |
|
|
} else { |
2786 |
|
|
$HTMLTransparentChecker{check_child_element}->(@_); |
2787 |
|
|
} |
2788 |
|
|
}, |
2789 |
|
|
check_child_text => sub { |
2790 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2791 |
|
|
if ($self->{flag}->{in_head}) { |
2792 |
|
|
if ($has_significant) { |
2793 |
|
|
$self->{onerror}->(node => $child_node, |
2794 |
|
|
type => 'character not allowed'); |
2795 |
wakaba |
1.3 |
} |
2796 |
|
|
} else { |
2797 |
wakaba |
1.40 |
$HTMLTransparentChecker{check_child_text}->(@_); |
2798 |
|
|
} |
2799 |
|
|
}, |
2800 |
|
|
check_end => sub { |
2801 |
|
|
my ($self, $item, $element_state) = @_; |
2802 |
|
|
$self->_remove_minus_elements ($element_state); |
2803 |
|
|
if ($self->{flag}->{in_head}) { |
2804 |
|
|
$HTMLChecker{check_end}->(@_); |
2805 |
|
|
} else { |
2806 |
|
|
$HTMLPhrasingContentChecker{check_end}->(@_); |
2807 |
wakaba |
1.3 |
} |
2808 |
wakaba |
1.1 |
}, |
2809 |
|
|
}; |
2810 |
wakaba |
1.3 |
## ISSUE: Scripting is disabled: <head><noscript><html a></noscript></head> |
2811 |
wakaba |
1.1 |
|
2812 |
|
|
$Element->{$HTML_NS}->{'event-source'} = { |
2813 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2814 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2815 |
wakaba |
1.1 |
src => $HTMLURIAttrChecker, |
2816 |
|
|
}), |
2817 |
|
|
}; |
2818 |
|
|
|
2819 |
|
|
$Element->{$HTML_NS}->{details} = { |
2820 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
2821 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2822 |
wakaba |
1.1 |
open => $GetHTMLBooleanAttrChecker->('open'), |
2823 |
|
|
}), |
2824 |
|
|
checker => sub { |
2825 |
|
|
my ($self, $todo) = @_; |
2826 |
|
|
|
2827 |
wakaba |
1.29 |
## TODO: |
2828 |
wakaba |
1.40 |
# my ($sib, $ch) |
2829 |
|
|
# = $GetHTMLZeroOrMoreThenBlockOrInlineChecker->($HTML_NS, 'legend') |
2830 |
|
|
# ->($self, $todo); |
2831 |
|
|
# return ($sib, $ch); |
2832 |
wakaba |
1.1 |
}, |
2833 |
|
|
}; |
2834 |
|
|
|
2835 |
|
|
$Element->{$HTML_NS}->{datagrid} = { |
2836 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
2837 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2838 |
wakaba |
1.1 |
disabled => $GetHTMLBooleanAttrChecker->('disabled'), |
2839 |
|
|
multiple => $GetHTMLBooleanAttrChecker->('multiple'), |
2840 |
|
|
}), |
2841 |
wakaba |
1.40 |
check_start => sub { |
2842 |
|
|
my ($self, $item, $element_state) = @_; |
2843 |
wakaba |
1.1 |
|
2844 |
wakaba |
1.40 |
$self->_add_minus_elements ($element_state, |
2845 |
|
|
{$HTML_NS => {a => 1, datagrid => 1}}); |
2846 |
|
|
$element_state->{phase} = 'any'; |
2847 |
|
|
}, |
2848 |
|
|
## Prose -(text* table Prose*) | table | select | datalist | Empty |
2849 |
|
|
check_child_element => sub { |
2850 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2851 |
|
|
$child_is_transparent, $element_state) = @_; |
2852 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2853 |
|
|
$self->{onerror}->(node => $child_el, |
2854 |
|
|
type => 'element not allowed:minus', |
2855 |
|
|
level => $self->{must_level}); |
2856 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2857 |
|
|
# |
2858 |
|
|
} elsif ($element_state->{phase} eq 'prose') { |
2859 |
|
|
if ($HTMLProseContent->{$child_nsuri}->{$child_ln}) { |
2860 |
|
|
if ($element_state->{has_element} and |
2861 |
|
|
$child_nsuri eq $HTML_NS and |
2862 |
|
|
$child_ln eq 'table') { |
2863 |
|
|
$self->{onerror}->(node => $child_el, |
2864 |
|
|
type => 'element not allowed'); |
2865 |
|
|
} else { |
2866 |
wakaba |
1.8 |
# |
2867 |
wakaba |
1.1 |
} |
2868 |
wakaba |
1.40 |
} else { |
2869 |
|
|
$self->{onerror}->(node => $child_el, |
2870 |
|
|
type => 'element not allowed'); |
2871 |
|
|
} |
2872 |
|
|
} elsif ($element_state->{phase} eq 'any') { |
2873 |
|
|
if ($child_nsuri eq $HTML_NS and |
2874 |
|
|
{table => 1, select => 1, datalist => 1}->{$child_ln}) { |
2875 |
|
|
$element_state->{phase} = 'none'; |
2876 |
|
|
} elsif ($HTMLProseContent->{$child_nsuri}->{$child_ln}) { |
2877 |
|
|
$element_state->{has_element} = 1; |
2878 |
|
|
$element_state->{phase} = 'prose'; |
2879 |
|
|
} else { |
2880 |
|
|
$self->{onerror}->(node => $child_el, |
2881 |
|
|
type => 'element not allowed'); |
2882 |
|
|
} |
2883 |
|
|
} elsif ($element_state->{phase} eq 'none') { |
2884 |
|
|
$self->{onerror}->(node => $child_el, |
2885 |
|
|
type => 'element not allowed'); |
2886 |
|
|
} else { |
2887 |
|
|
die "check_child_element: Bad |datagrid| phase: $element_state->{phase}"; |
2888 |
|
|
} |
2889 |
|
|
}, |
2890 |
|
|
check_child_text => sub { |
2891 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
2892 |
|
|
if ($has_significant) { |
2893 |
|
|
if ($element_state->{phase} eq 'prose') { |
2894 |
|
|
# |
2895 |
|
|
} elsif ($element_state->{phase} eq 'any') { |
2896 |
|
|
$element_state->{phase} = 'prose'; |
2897 |
|
|
} else { |
2898 |
|
|
$self->{onerror}->(node => $child_node, |
2899 |
|
|
type => 'character not allowed'); |
2900 |
wakaba |
1.1 |
} |
2901 |
|
|
} |
2902 |
wakaba |
1.40 |
}, |
2903 |
|
|
check_end => sub { |
2904 |
|
|
my ($self, $item, $element_state) = @_; |
2905 |
|
|
$self->_remove_minus_elements ($element_state); |
2906 |
wakaba |
1.1 |
|
2907 |
wakaba |
1.40 |
if ($element_state->{phase} eq 'none') { |
2908 |
|
|
$HTMLChecker{check_end}->(@_); |
2909 |
|
|
} else { |
2910 |
|
|
$HTMLPhrasingContentChecker{check_end}->(@_); |
2911 |
|
|
} |
2912 |
|
|
}, |
2913 |
wakaba |
1.29 |
## ISSUE: "xxx<table/>" is disallowed; "<select/>aaa" and "<datalist/>aa" |
2914 |
|
|
## are not disallowed (assuming that form control contents are also |
2915 |
|
|
## prose content). |
2916 |
wakaba |
1.1 |
}; |
2917 |
|
|
|
2918 |
|
|
$Element->{$HTML_NS}->{command} = { |
2919 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
2920 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2921 |
wakaba |
1.1 |
checked => $GetHTMLBooleanAttrChecker->('checked'), |
2922 |
|
|
default => $GetHTMLBooleanAttrChecker->('default'), |
2923 |
|
|
disabled => $GetHTMLBooleanAttrChecker->('disabled'), |
2924 |
|
|
hidden => $GetHTMLBooleanAttrChecker->('hidden'), |
2925 |
|
|
icon => $HTMLURIAttrChecker, |
2926 |
|
|
label => sub { }, ## NOTE: No conformance creteria |
2927 |
|
|
radiogroup => sub { }, ## NOTE: No conformance creteria |
2928 |
|
|
## NOTE: |title| has special semantics, but no syntactical difference |
2929 |
|
|
type => sub { |
2930 |
|
|
my ($self, $attr) = @_; |
2931 |
|
|
my $value = $attr->value; |
2932 |
|
|
unless ({command => 1, checkbox => 1, radio => 1}->{$value}) { |
2933 |
|
|
$self->{onerror}->(node => $attr, type => 'attribute value not allowed'); |
2934 |
|
|
} |
2935 |
|
|
}, |
2936 |
|
|
}), |
2937 |
|
|
}; |
2938 |
|
|
|
2939 |
|
|
$Element->{$HTML_NS}->{menu} = { |
2940 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
2941 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
2942 |
wakaba |
1.1 |
autosubmit => $GetHTMLBooleanAttrChecker->('autosubmit'), |
2943 |
|
|
id => sub { |
2944 |
|
|
## NOTE: same as global |id=""|, with |$self->{menu}| registeration |
2945 |
|
|
my ($self, $attr) = @_; |
2946 |
|
|
my $value = $attr->value; |
2947 |
|
|
if (length $value > 0) { |
2948 |
|
|
if ($self->{id}->{$value}) { |
2949 |
|
|
$self->{onerror}->(node => $attr, type => 'duplicate ID'); |
2950 |
|
|
push @{$self->{id}->{$value}}, $attr; |
2951 |
|
|
} else { |
2952 |
|
|
$self->{id}->{$value} = [$attr]; |
2953 |
|
|
} |
2954 |
|
|
} else { |
2955 |
|
|
## NOTE: MUST contain at least one character |
2956 |
|
|
$self->{onerror}->(node => $attr, type => 'empty attribute value'); |
2957 |
|
|
} |
2958 |
|
|
if ($value =~ /[\x09-\x0D\x20]/) { |
2959 |
|
|
$self->{onerror}->(node => $attr, type => 'space in ID'); |
2960 |
|
|
} |
2961 |
|
|
$self->{menu}->{$value} ||= $attr; |
2962 |
|
|
## ISSUE: <menu id=""><p contextmenu=""> match? |
2963 |
|
|
}, |
2964 |
|
|
label => sub { }, ## NOTE: No conformance creteria |
2965 |
|
|
type => $GetHTMLEnumeratedAttrChecker->({context => 1, toolbar => 1}), |
2966 |
|
|
}), |
2967 |
wakaba |
1.40 |
check_start => sub { |
2968 |
|
|
my ($self, $item, $element_state) = @_; |
2969 |
|
|
$element_state->{phase} = 'li or phrasing'; |
2970 |
|
|
$element_state->{in_menu_original} = $self->{flag}->{in_menu}; |
2971 |
|
|
$self->{flag}->{in_menu} = 1; |
2972 |
|
|
}, |
2973 |
|
|
check_child_element => sub { |
2974 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
2975 |
|
|
$child_is_transparent, $element_state) = @_; |
2976 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
2977 |
|
|
$self->{onerror}->(node => $child_el, |
2978 |
|
|
type => 'element not allowed:minus', |
2979 |
|
|
level => $self->{must_level}); |
2980 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
2981 |
|
|
# |
2982 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'li') { |
2983 |
|
|
if ($element_state->{phase} eq 'li') { |
2984 |
|
|
# |
2985 |
|
|
} elsif ($element_state->{phase} eq 'li or phrasing') { |
2986 |
|
|
$element_state->{phase} = 'li'; |
2987 |
|
|
} else { |
2988 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2989 |
|
|
} |
2990 |
|
|
} elsif ($HTMLPhrasingContent->{$child_nsuri}->{$child_ln}) { |
2991 |
|
|
if ($element_state->{phase} eq 'phrasing') { |
2992 |
|
|
# |
2993 |
|
|
} elsif ($element_state->{phase} eq 'li or phrasing') { |
2994 |
|
|
$element_state->{phase} = 'phrasing'; |
2995 |
|
|
} else { |
2996 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
2997 |
|
|
} |
2998 |
|
|
} else { |
2999 |
|
|
$self->{onerror}->(node => $child_el, type => 'element not allowed'); |
3000 |
|
|
} |
3001 |
|
|
}, |
3002 |
|
|
check_child_text => sub { |
3003 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
3004 |
|
|
if ($has_significant) { |
3005 |
|
|
if ($element_state->{phase} eq 'phrasing') { |
3006 |
|
|
# |
3007 |
|
|
} elsif ($element_state->{phase} eq 'li or phrasing') { |
3008 |
|
|
$element_state->{phase} = 'phrasing'; |
3009 |
|
|
} else { |
3010 |
|
|
$self->{onerror}->(node => $child_node, |
3011 |
|
|
type => 'character not allowed'); |
3012 |
wakaba |
1.1 |
} |
3013 |
|
|
} |
3014 |
wakaba |
1.40 |
}, |
3015 |
|
|
check_end => sub { |
3016 |
|
|
my ($self, $item, $element_state) = @_; |
3017 |
|
|
delete $self->{flag}->{in_menu} unless $element_state->{in_menu_original}; |
3018 |
|
|
|
3019 |
|
|
if ($element_state->{phase} eq 'li') { |
3020 |
|
|
$HTMLChecker{check_end}->(@_); |
3021 |
|
|
} else { # 'phrasing' or 'li or phrasing' |
3022 |
|
|
$HTMLPhrasingContentChecker{check_end}->(@_); |
3023 |
wakaba |
1.1 |
} |
3024 |
|
|
}, |
3025 |
wakaba |
1.8 |
}; |
3026 |
|
|
|
3027 |
|
|
$Element->{$HTML_NS}->{datatemplate} = { |
3028 |
wakaba |
1.40 |
%HTMLChecker, |
3029 |
|
|
check_child_element => sub { |
3030 |
|
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
3031 |
|
|
$child_is_transparent, $element_state) = @_; |
3032 |
|
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
3033 |
|
|
$self->{onerror}->(node => $child_el, |
3034 |
|
|
type => 'element not allowed:minus', |
3035 |
|
|
level => $self->{must_level}); |
3036 |
|
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
3037 |
|
|
# |
3038 |
|
|
} elsif ($child_nsuri eq $HTML_NS and $child_ln eq 'rule') { |
3039 |
|
|
# |
3040 |
|
|
} else { |
3041 |
|
|
$self->{onerror}->(node => $child_el, |
3042 |
|
|
type => 'element not allowed:datatemplate'); |
3043 |
|
|
} |
3044 |
|
|
}, |
3045 |
|
|
check_child_text => sub { |
3046 |
|
|
my ($self, $item, $child_node, $has_significant, $element_state) = @_; |
3047 |
|
|
if ($has_significant) { |
3048 |
|
|
$self->{onerror}->(node => $child_node, type => 'character not allowed'); |
3049 |
wakaba |
1.8 |
} |
3050 |
|
|
}, |
3051 |
|
|
is_xml_root => 1, |
3052 |
|
|
}; |
3053 |
|
|
|
3054 |
|
|
$Element->{$HTML_NS}->{rule} = { |
3055 |
wakaba |
1.40 |
%HTMLChecker, |
3056 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
3057 |
wakaba |
1.23 |
condition => $HTMLSelectorsAttrChecker, |
3058 |
wakaba |
1.18 |
mode => $HTMLUnorderedUniqueSetOfSpaceSeparatedTokensAttrChecker, |
3059 |
wakaba |
1.8 |
}), |
3060 |
wakaba |
1.40 |
check_start => sub { |
3061 |
|
|
my ($self, $item, $element_state) = @_; |
3062 |
|
|
$self->_add_plus_elements ($element_state, {$HTML_NS => {nest => 1}}); |
3063 |
|
|
}, |
3064 |
|
|
check_child_element => sub { }, |
3065 |
|
|
check_child_text => sub { }, |
3066 |
|
|
check_end => sub { |
3067 |
|
|
my ($self, $item, $element_state) = @_; |
3068 |
|
|
$self->_remove_plus_elements ($element_state); |
3069 |
|
|
$HTMLChecker{check_end}->(@_); |
3070 |
wakaba |
1.8 |
}, |
3071 |
|
|
## NOTE: "MAY be anything that, when the parent |datatemplate| |
3072 |
|
|
## is applied to some conforming data, results in a conforming DOM tree.": |
3073 |
|
|
## We don't check against this. |
3074 |
|
|
}; |
3075 |
|
|
|
3076 |
|
|
$Element->{$HTML_NS}->{nest} = { |
3077 |
wakaba |
1.40 |
%HTMLEmptyChecker, |
3078 |
|
|
check_attrs => $GetHTMLAttrsChecker->({ |
3079 |
wakaba |
1.23 |
filter => $HTMLSelectorsAttrChecker, |
3080 |
|
|
mode => sub { |
3081 |
|
|
my ($self, $attr) = @_; |
3082 |
|
|
my $value = $attr->value; |
3083 |
|
|
if ($value !~ /\A[^\x09-\x0D\x20]+\z/) { |
3084 |
|
|
$self->{onerror}->(node => $attr, type => 'mode:syntax error'); |
3085 |
|
|
} |
3086 |
|
|
}, |
3087 |
wakaba |
1.8 |
}), |
3088 |
wakaba |
1.1 |
}; |
3089 |
|
|
|
3090 |
|
|
$Element->{$HTML_NS}->{legend} = { |
3091 |
wakaba |
1.40 |
%HTMLPhrasingContentChecker, |
3092 |
wakaba |
1.1 |
}; |
3093 |
|
|
|
3094 |
|
|
$Element->{$HTML_NS}->{div} = { |
3095 |
wakaba |
1.40 |
%HTMLProseContentChecker, |
3096 |
wakaba |
1.1 |
}; |
3097 |
|
|
|
3098 |
|
|
$Element->{$HTML_NS}->{font} = { |
3099 |
wakaba |
1.40 |
%HTMLTransparentChecker, |
3100 |
|
|
check_attrs => $GetHTMLAttrsChecker->({}), ## TODO |
3101 |
wakaba |
1.1 |
}; |
3102 |
|
|
|
3103 |
|
|
$Whatpm::ContentChecker::Namespace->{$HTML_NS}->{loaded} = 1; |
3104 |
|
|
|
3105 |
|
|
1; |