59 |
# |
# |
60 |
} else { |
} else { |
61 |
## NOTE: An XML "error" |
## NOTE: An XML "error" |
62 |
$self->{onerror}->(node => $attr, level => 'error', |
$self->{onerror}->(node => $attr, level => $self->{level}->{xml_error}, |
63 |
type => 'invalid attribute value'); |
type => 'invalid attribute value'); |
64 |
} |
} |
65 |
}, |
}, |
71 |
} else { |
} else { |
72 |
require Whatpm::LangTag; |
require Whatpm::LangTag; |
73 |
Whatpm::LangTag->check_rfc3066_language_tag ($value, sub { |
Whatpm::LangTag->check_rfc3066_language_tag ($value, sub { |
74 |
my %opt = @_; |
$self->{onerror}->(@_, node => $attr); |
|
my $type = 'LangTag:'.$opt{type}; |
|
|
$type .= ':' . $opt{subtag} if defined $opt{subtag}; |
|
|
$self->{onerror}->(node => $attr, type => $type, |
|
|
value => $opt{value}, level => $opt{level}); |
|
75 |
}); |
}); |
76 |
} |
} |
77 |
|
|
85 |
## TODO: test data |
## TODO: test data |
86 |
|
|
87 |
if ($attr->owner_document->manakai_is_html) { # MUST NOT |
if ($attr->owner_document->manakai_is_html) { # MUST NOT |
88 |
$self->{onerror}->(node => $attr, type => 'in HTML:xml:lang'); |
$self->{onerror}->(node => $attr, type => 'in HTML:xml:lang', |
89 |
|
level => $self->{level}->{must}); |
90 |
## TODO: Test data... |
## TODO: Test data... |
91 |
} |
} |
92 |
}, |
}, |
95 |
my $value = $attr->value; |
my $value = $attr->value; |
96 |
if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters? |
if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters? |
97 |
$self->{onerror}->(node => $attr, |
$self->{onerror}->(node => $attr, |
98 |
type => 'invalid attribute value'); |
type => 'invalid attribute value', |
99 |
|
level => $self->{level}->{fact}, ## TODO: correct? |
100 |
|
); |
101 |
} |
} |
102 |
## NOTE: Conformance to URI standard is not checked since there is |
## NOTE: Conformance to URI standard is not checked since there is |
103 |
## no author requirement on conformance in the XML Base specification. |
## no author requirement on conformance in the XML Base specification. |
110 |
$value =~ s/\x20$//; |
$value =~ s/\x20$//; |
111 |
## TODO: NCName in XML 1.0 or 1.1 |
## TODO: NCName in XML 1.0 or 1.1 |
112 |
## TODO: declared type is ID? |
## TODO: declared type is ID? |
113 |
if ($self->{id}->{$value}) { ## NOTE: An xml:id error |
if ($self->{id}->{$value}) { |
114 |
$self->{onerror}->(node => $attr, level => 'error', |
$self->{onerror}->(node => $attr, |
115 |
type => 'duplicate ID'); |
type => 'duplicate ID', |
116 |
|
level => $self->{level}->{xml_id_error}); |
117 |
push @{$self->{id}->{$value}}, $attr; |
push @{$self->{id}->{$value}}, $attr; |
118 |
} else { |
} else { |
119 |
$self->{id}->{$value} = [$attr]; |
$self->{id}->{$value} = [$attr]; |
127 |
my $value = $attr->value; |
my $value = $attr->value; |
128 |
if ($value eq $XML_NS and $ln ne 'xml') { |
if ($value eq $XML_NS and $ln ne 'xml') { |
129 |
$self->{onerror} |
$self->{onerror} |
130 |
->(node => $attr, level => 'NC', |
->(node => $attr, |
131 |
type => 'Reserved Prefixes and Namespace Names:=xml'); |
type => 'Reserved Prefixes and Namespace Names:Name', |
132 |
|
text => $value, |
133 |
|
level => $self->{level}->{nc}); |
134 |
} elsif ($value eq $XMLNS_NS) { |
} elsif ($value eq $XMLNS_NS) { |
135 |
$self->{onerror} |
$self->{onerror} |
136 |
->(node => $attr, level => 'NC', |
->(node => $attr, |
137 |
type => 'Reserved Prefixes and Namespace Names:=xmlns'); |
type => 'Reserved Prefixes and Namespace Names:Name', |
138 |
|
text => $value, |
139 |
|
level => $self->{level}->{nc}); |
140 |
} |
} |
141 |
if ($ln eq 'xml' and $value ne $XML_NS) { |
if ($ln eq 'xml' and $value ne $XML_NS) { |
142 |
$self->{onerror} |
$self->{onerror} |
143 |
->(node => $attr, level => 'NC', |
->(node => $attr, |
144 |
type => 'Reserved Prefixes and Namespace Names:xmlns:xml='); |
type => 'Reserved Prefixes and Namespace Names:Prefix', |
145 |
|
text => $ln, |
146 |
|
level => $self->{level}->{nc}); |
147 |
} elsif ($ln eq 'xmlns') { |
} elsif ($ln eq 'xmlns') { |
148 |
$self->{onerror} |
$self->{onerror} |
149 |
->(node => $attr, level => 'NC', |
->(node => $attr, |
150 |
type => 'Reserved Prefixes and Namespace Names:xmlns:xmlns='); |
type => 'Reserved Prefixes and Namespace Names:Prefix', |
151 |
|
text => $ln, |
152 |
|
level => $self->{level}->{nc}); |
153 |
} |
} |
154 |
## TODO: If XML 1.0 and empty |
## TODO: If XML 1.0 and empty |
155 |
}, |
}, |
161 |
my $value = $attr->value; |
my $value = $attr->value; |
162 |
if ($value eq $XML_NS) { |
if ($value eq $XML_NS) { |
163 |
$self->{onerror} |
$self->{onerror} |
164 |
->(node => $attr, level => 'NC', |
->(node => $attr, |
165 |
type => 'Reserved Prefixes and Namespace Names:=xml'); |
type => 'Reserved Prefixes and Namespace Names:Name', |
166 |
|
text => $value, |
167 |
|
level => $self->{level}->{nc}); |
168 |
} elsif ($value eq $XMLNS_NS) { |
} elsif ($value eq $XMLNS_NS) { |
169 |
$self->{onerror} |
$self->{onerror} |
170 |
->(node => $attr, level => 'NC', |
->(node => $attr, |
171 |
type => 'Reserved Prefixes and Namespace Names:=xmlns'); |
type => 'Reserved Prefixes and Namespace Names:Name', |
172 |
|
text => $value, |
173 |
|
level => $self->{level}->{nc}); |
174 |
} |
} |
175 |
}, |
}, |
176 |
}, |
}, |
226 |
if ($checker) { |
if ($checker) { |
227 |
$checker->($self, $attr); |
$checker->($self, $attr); |
228 |
} else { |
} else { |
229 |
$self->{onerror}->(node => $attr, level => 'unsupported', |
$self->{onerror}->(node => $attr, |
230 |
type => 'attribute'); |
type => 'unknown attribute', |
231 |
|
level => $self->{level}->{uncertain}); |
232 |
} |
} |
233 |
$self->_attr_status_info ($attr, $status); |
$self->_attr_status_info ($attr, $status); |
234 |
} |
} |
239 |
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
240 |
$self->{onerror}->(node => $child_el, |
$self->{onerror}->(node => $child_el, |
241 |
type => 'element not allowed:minus', |
type => 'element not allowed:minus', |
242 |
level => $self->{must_level}); |
level => $self->{level}->{must}); |
243 |
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
244 |
# |
# |
245 |
} else { |
} else { |
262 |
## NOTE: No "element not defined" error - it is not supported anyway. |
## NOTE: No "element not defined" error - it is not supported anyway. |
263 |
check_start => sub { |
check_start => sub { |
264 |
my ($self, $item, $element_state) = @_; |
my ($self, $item, $element_state) = @_; |
265 |
$self->{onerror}->(node => $item->{node}, level => 'unsupported', |
$self->{onerror}->(node => $item->{node}, |
266 |
type => 'element'); |
type => 'unknown element', |
267 |
|
level => $self->{level}->{uncertain}); |
268 |
}, |
}, |
269 |
}; |
}; |
270 |
|
|
335 |
}, |
}, |
336 |
}; |
}; |
337 |
|
|
338 |
|
my $default_error_level = { |
339 |
|
must => 'm', |
340 |
|
should => 's', |
341 |
|
warn => 'w', |
342 |
|
good => 'w', |
343 |
|
info => 'i', |
344 |
|
uncertain => 'u', |
345 |
|
|
346 |
|
fact => 'm', |
347 |
|
xml_error => 'm', ## TODO: correct? |
348 |
|
nc => 'm', ## XML Namespace Constraints ## TODO: correct? |
349 |
|
}; |
350 |
|
|
351 |
sub check_document ($$$;$) { |
sub check_document ($$$;$) { |
352 |
my ($self, $doc, $onerror, $onsubdoc) = @_; |
my ($self, $doc, $onerror, $onsubdoc) = @_; |
353 |
$self = bless {}, $self unless ref $self; |
$self = bless {}, $self unless ref $self; |
356 |
warn "A subdocument is not conformance-checked"; |
warn "A subdocument is not conformance-checked"; |
357 |
}; |
}; |
358 |
|
|
359 |
$self->{must_level} = 'm'; |
$self->{level} ||= $default_error_level; |
|
$self->{fact_level} = 'm'; |
|
|
$self->{should_level} = 's'; |
|
|
$self->{good_level} = 'w'; |
|
|
$self->{info_level} = 'i'; |
|
|
$self->{unsupported_level} = 'u'; |
|
360 |
|
|
361 |
## TODO: If application/rdf+xml, RDF/XML mode should be invoked. |
## TODO: If application/rdf+xml, RDF/XML mode should be invoked. |
362 |
|
|
363 |
my $docel = $doc->document_element; |
my $docel = $doc->document_element; |
364 |
unless (defined $docel) { |
unless (defined $docel) { |
365 |
## ISSUE: Should we check content of Document node? |
## ISSUE: Should we check content of Document node? |
366 |
$onerror->(node => $doc, type => 'no document element'); |
$onerror->(node => $doc, type => 'no document element', |
367 |
|
level => $self->{level}->{must}); |
368 |
## ISSUE: Is this non-conforming (to what spec)? Or just a warning? |
## ISSUE: Is this non-conforming (to what spec)? Or just a warning? |
369 |
return { |
return { |
370 |
class => {}, |
class => {}, |
386 |
unless ($doc->manakai_is_html) { |
unless ($doc->manakai_is_html) { |
387 |
# |
# |
388 |
} else { |
} else { |
389 |
$onerror->(node => $docel, type => 'element not allowed:root:xml'); |
$onerror->(node => $docel, type => 'element not allowed:root:xml', |
390 |
|
level => $self->{level}->{must}); |
391 |
} |
} |
392 |
} else { |
} else { |
393 |
$onerror->(node => $docel, type => 'element not allowed:root'); |
$onerror->(node => $docel, type => 'element not allowed:root', |
394 |
|
level => $self->{level}->{must}); |
395 |
} |
} |
396 |
|
|
397 |
## TODO: Check for other items other than document element |
## TODO: Check for other items other than document element |
409 |
if (not $doc->manakai_has_bom and |
if (not $doc->manakai_has_bom and |
410 |
not defined $doc->manakai_charset) { |
not defined $doc->manakai_charset) { |
411 |
unless ($charset->{is_html_ascii_superset}) { |
unless ($charset->{is_html_ascii_superset}) { |
412 |
$onerror->(node => $doc, level => $self->{must_level}, |
$onerror->(node => $doc, level => $self->{level}->{must}, |
413 |
type => 'non ascii superset:'.$charset_name); |
type => 'non ascii superset', |
414 |
|
text => $charset_name); |
415 |
} |
} |
416 |
|
|
417 |
if (not $self->{has_charset} and ## TODO: This does not work now. |
if (not $self->{has_charset} and ## TODO: This does not work now. |
418 |
not $charset->{iana_names}->{'us-ascii'}) { |
not $charset->{iana_names}->{'us-ascii'}) { |
419 |
$onerror->(node => $doc, level => $self->{must_level}, |
$onerror->(node => $doc, level => $self->{level}->{must}, |
420 |
type => 'no character encoding declaration:'.$charset_name); |
type => 'no character encoding declaration', |
421 |
|
text => $charset_name); |
422 |
} |
} |
423 |
} |
} |
424 |
|
|
429 |
$charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE? |
$charset->{iana_names}->{'utf-32'} or ## ISSUE: UTF-32BE? UTF-32LE? |
430 |
$charset->{is_ebcdic_based}) { |
$charset->{is_ebcdic_based}) { |
431 |
$onerror->(node => $doc, |
$onerror->(node => $doc, |
432 |
type => 'character encoding:'.$charset_name, |
type => 'bad character encoding', |
433 |
level => $self->{should_level}); |
text => $charset_name, |
434 |
|
level => $self->{level}->{should}, |
435 |
|
layer => 'encode'); |
436 |
} elsif ($charset->{iana_names}->{'cesu-8'} or |
} elsif ($charset->{iana_names}->{'cesu-8'} or |
437 |
$charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7? |
$charset->{iana_names}->{'utf-8'} or ## ISSUE: UNICODE-1-1-UTF-7? |
438 |
$charset->{iana_names}->{'bocu-1'} or |
$charset->{iana_names}->{'bocu-1'} or |
439 |
$charset->{iana_names}->{'scsu'}) { |
$charset->{iana_names}->{'scsu'}) { |
440 |
$onerror->(node => $doc, |
$onerror->(node => $doc, |
441 |
type => 'character encoding:'.$charset_name, |
type => 'disallowed character encoding', |
442 |
level => $self->{must_level}); |
text => $charset_name, |
443 |
|
level => $self->{level}->{must}, |
444 |
|
layer => 'encode'); |
445 |
} else { |
} else { |
446 |
$onerror->(node => $doc, |
$onerror->(node => $doc, |
447 |
type => 'character encoding:'.$charset_name, |
type => 'non-utf-8 character encoding', |
448 |
level => $self->{good_level}); |
text => $charset_name, |
449 |
|
level => $self->{level}->{good}, |
450 |
|
layer => 'encode'); |
451 |
} |
} |
452 |
} |
} |
453 |
} elsif ($doc->manakai_is_html) { |
} elsif ($doc->manakai_is_html) { |
454 |
## NOTE: MUST and SHOULD requirements above cannot be tested, |
## NOTE: MUST and SHOULD requirements above cannot be tested, |
455 |
## since the document has no input charset encoding information. |
## since the document has no input charset encoding information. |
456 |
$onerror->(node => $doc, |
$onerror->(node => $doc, |
457 |
type => 'character encoding:', |
type => 'character encoding unchecked', |
458 |
level => 'unsupported'); |
level => $self->{level}->{info}, |
459 |
|
layer => 'encode'); |
460 |
} |
} |
461 |
|
|
462 |
return $return; |
return $return; |
472 |
warn "A subdocument is not conformance-checked"; |
warn "A subdocument is not conformance-checked"; |
473 |
}; |
}; |
474 |
|
|
475 |
$self->{must_level} = 'm'; |
$self->{level} ||= $default_error_level; |
|
$self->{fact_level} = 'm'; |
|
|
$self->{should_level} = 's'; |
|
|
$self->{good_level} = 'w'; |
|
|
$self->{info_level} = 'i'; |
|
|
$self->{unsupported_level} = 'u'; |
|
476 |
|
|
477 |
$self->{plus_elements} = {}; |
$self->{plus_elements} = {}; |
478 |
$self->{minus_elements} = {}; |
$self->{minus_elements} = {}; |
532 |
$eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard'; |
$eldef->{status} & FEATURE_STATUS_WD ? 'wd' : 'non-standard'; |
533 |
$self->{onerror}->(node => $item->{node}, |
$self->{onerror}->(node => $item->{node}, |
534 |
type => 'status:'.$status.':element', |
type => 'status:'.$status.':element', |
535 |
level => $self->{info_level}); |
level => $self->{level}->{info}); |
536 |
} |
} |
537 |
if (not ($eldef->{status} & FEATURE_ALLOWED)) { |
if (not ($eldef->{status} & FEATURE_ALLOWED)) { |
538 |
$self->{onerror}->(node => $item->{node}, |
$self->{onerror}->(node => $item->{node}, |
539 |
type => 'element not defined', |
type => 'element not defined', |
540 |
level => $self->{must_level}); |
level => $self->{level}->{must}); |
541 |
} elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) { |
} elsif ($eldef->{status} & FEATURE_DEPRECATED_SHOULD) { |
542 |
$self->{onerror}->(node => $item->{node}, |
$self->{onerror}->(node => $item->{node}, |
543 |
type => 'deprecated:element', |
type => 'deprecated:element', |
544 |
level => $self->{should_level}); |
level => $self->{level}->{should}); |
545 |
} elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) { |
} elsif ($eldef->{status} & FEATURE_DEPRECATED_INFO) { |
546 |
$self->{onerror}->(node => $item->{node}, |
$self->{onerror}->(node => $item->{node}, |
547 |
type => 'deprecated:element', |
type => 'deprecated:element', |
548 |
level => $self->{info_level}); |
level => $self->{level}->{info}); |
549 |
} |
} |
550 |
|
|
551 |
my @new_item; |
my @new_item; |
652 |
if ($el eq $_->[1]->owner_element) { |
if ($el eq $_->[1]->owner_element) { |
653 |
$self->{onerror}->(node => $_->[1], |
$self->{onerror}->(node => $_->[1], |
654 |
type => 'fragment points itself', |
type => 'fragment points itself', |
655 |
level => $self->{must_level}); |
level => $self->{level}->{must}); |
656 |
} |
} |
657 |
|
|
658 |
last F; |
last F; |
663 |
## if the fragment identifier identifies no element? |
## if the fragment identifier identifies no element? |
664 |
|
|
665 |
$self->{onerror}->(node => $_->[1], type => 'template:not template', |
$self->{onerror}->(node => $_->[1], type => 'template:not template', |
666 |
level => $self->{must_level}); |
level => $self->{level}->{must}); |
667 |
} # F |
} # F |
668 |
} |
} |
669 |
|
|
676 |
if ($self->{id}->{$_->[0]}->[0]->owner_element |
if ($self->{id}->{$_->[0]}->[0]->owner_element |
677 |
eq $_->[1]->owner_element) { |
eq $_->[1]->owner_element) { |
678 |
$self->{onerror}->(node => $_->[1], type => 'fragment points itself', |
$self->{onerror}->(node => $_->[1], type => 'fragment points itself', |
679 |
level => $self->{must_level}); |
level => $self->{level}->{must}); |
680 |
} |
} |
681 |
} else { |
} else { |
682 |
$self->{onerror}->(node => $_->[1], type => 'fragment points nothing', |
$self->{onerror}->(node => $_->[1], type => 'fragment points nothing', |
683 |
level => $self->{must_level}); |
level => $self->{level}->{must}); |
684 |
} |
} |
685 |
} |
} |
686 |
|
|
688 |
|
|
689 |
for (@{$self->{usemap}}) { |
for (@{$self->{usemap}}) { |
690 |
unless ($self->{map}->{$_->[0]}) { |
unless ($self->{map}->{$_->[0]}) { |
691 |
$self->{onerror}->(node => $_->[1], type => 'no referenced map'); |
$self->{onerror}->(node => $_->[1], type => 'no referenced map', |
692 |
|
level => $self->{level}->{must}); |
693 |
} |
} |
694 |
} |
} |
695 |
|
|
696 |
for (@{$self->{contextmenu}}) { |
for (@{$self->{contextmenu}}) { |
697 |
unless ($self->{menu}->{$_->[0]}) { |
unless ($self->{menu}->{$_->[0]}) { |
698 |
$self->{onerror}->(node => $_->[1], type => 'no referenced menu'); |
$self->{onerror}->(node => $_->[1], type => 'no referenced menu', |
699 |
|
level => $self->{level}->{must}); |
700 |
} |
} |
701 |
} |
} |
702 |
|
|
767 |
if (not ($status_code & FEATURE_ALLOWED)) { |
if (not ($status_code & FEATURE_ALLOWED)) { |
768 |
$self->{onerror}->(node => $attr, |
$self->{onerror}->(node => $attr, |
769 |
type => 'attribute not defined', |
type => 'attribute not defined', |
770 |
level => $self->{must_level}); |
level => $self->{level}->{must}); |
771 |
} elsif ($status_code & FEATURE_DEPRECATED_SHOULD) { |
} elsif ($status_code & FEATURE_DEPRECATED_SHOULD) { |
772 |
$self->{onerror}->(node => $attr, |
$self->{onerror}->(node => $attr, |
773 |
type => 'deprecated:attr', |
type => 'deprecated:attr', |
774 |
level => $self->{should_level}); |
level => $self->{level}->{should}); |
775 |
} elsif ($status_code & FEATURE_DEPRECATED_INFO) { |
} elsif ($status_code & FEATURE_DEPRECATED_INFO) { |
776 |
$self->{onerror}->(node => $attr, |
$self->{onerror}->(node => $attr, |
777 |
type => 'deprecated:attr', |
type => 'deprecated:attr', |
778 |
level => $self->{info_level}); |
level => $self->{level}->{info}); |
779 |
} |
} |
780 |
|
|
781 |
my $status; |
my $status; |
792 |
} |
} |
793 |
$self->{onerror}->(node => $attr, |
$self->{onerror}->(node => $attr, |
794 |
type => 'status:'.$status.':attr', |
type => 'status:'.$status.':attr', |
795 |
level => $self->{info_level}); |
level => $self->{level}->{info}); |
796 |
} # _attr_status_info |
} # _attr_status_info |
797 |
|
|
798 |
sub _add_minuses ($@) { |
sub _add_minuses ($@) { |