| 18 |
|
|
| 19 |
my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>; |
my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>; |
| 20 |
|
|
| 21 |
use Char::Class::XML qw(InXML_NCNameStartChar10 InXMLNameChar10); |
use Char::Class::XML qw(InXML_NCNameStartChar10 InXMLNCNameChar10); |
| 22 |
require Whatpm::URIChecker; |
require Whatpm::URIChecker; |
| 23 |
|
|
| 24 |
sub new ($) { |
sub new ($) { |
| 25 |
my $self = bless {fact_level => 'm', grammer_level => 'm', |
my $self = bless { |
| 26 |
info_level => 'i', next_id => 0}, shift; |
level => { |
| 27 |
|
rdf_fact => 'm', |
| 28 |
|
rdf_grammer => 'm', |
| 29 |
|
rdf_lc_must => 'm', |
| 30 |
|
info => 'i', |
| 31 |
|
}, |
| 32 |
|
next_id => 0, |
| 33 |
|
}, shift; |
| 34 |
$self->{onerror} = sub { |
$self->{onerror} = sub { |
| 35 |
my %opt = @_; |
my %opt = @_; |
| 36 |
warn $opt{type}, "\n"; |
warn $opt{type}, "\n"; |
| 97 |
$has_element = 1; |
$has_element = 1; |
| 98 |
} else { |
} else { |
| 99 |
$self->{onerror}->(type => 'second node element', |
$self->{onerror}->(type => 'second node element', |
| 100 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 101 |
node => $cn); |
node => $cn); |
| 102 |
} |
} |
| 103 |
} elsif ($cn->node_type == $cn->TEXT_NODE or |
} elsif ($cn->node_type == $cn->TEXT_NODE or |
| 104 |
$cn->node_type == $cn->CDATA_SECTION_NODE) { |
$cn->node_type == $cn->CDATA_SECTION_NODE) { |
| 105 |
$self->{onerror}->(type => 'character not allowed', |
$self->{onerror}->(type => 'character not allowed', |
| 106 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 107 |
node => $cn); |
node => $cn); |
| 108 |
} |
} |
| 109 |
} |
} |
| 117 |
if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and |
if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and |
| 118 |
length $RDF_URI < length $node_nsuri) { |
length $RDF_URI < length $node_nsuri) { |
| 119 |
$self->{onerror}->(type => 'bad rdf namespace', |
$self->{onerror}->(type => 'bad rdf namespace', |
| 120 |
level => $self->{fact_level}, # Section 5.1 |
level => $self->{level}->{rdf_fact}, # Section 5.1 |
| 121 |
node => $node); |
node => $node); |
| 122 |
} |
} |
| 123 |
}; # $check_rdf_namespace |
}; # $check_rdf_namespace |
| 149 |
|
|
| 150 |
$check_rdf_namespace->($self, $attr); |
$check_rdf_namespace->($self, $attr); |
| 151 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 152 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 153 |
node => $attr); |
node => $attr); |
| 154 |
} |
} |
| 155 |
|
|
| 161 |
$cn->node_type == $cn->CDATA_SECTION_NODE) { |
$cn->node_type == $cn->CDATA_SECTION_NODE) { |
| 162 |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
| 163 |
$self->{onerror}->(type => 'character not allowed', |
$self->{onerror}->(type => 'character not allowed', |
| 164 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 165 |
node => $cn); |
node => $cn); |
| 166 |
} |
} |
| 167 |
} |
} |
| 210 |
my $abs_uri = $resolve->($attr->value, $attr); |
my $abs_uri = $resolve->($attr->value, $attr); |
| 211 |
|
|
| 212 |
Whatpm::URIChecker->check_iri_reference ($abs_uri, sub { |
Whatpm::URIChecker->check_iri_reference ($abs_uri, sub { |
| 213 |
my %opt = @_; |
$self->{onerror}->(@_, node => $attr); |
|
$self->{onerror}->(node => $attr, level => $opt{level}, |
|
|
type => 'URI::'.$opt{type}. |
|
|
(defined $opt{position} ? ':'.$opt{position} : '')); |
|
| 214 |
}); |
}); |
| 215 |
|
|
| 216 |
return $abs_uri; |
return $abs_uri; |
| 221 |
|
|
| 222 |
my $id = $attr->value; |
my $id = $attr->value; |
| 223 |
unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) { |
unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) { |
| 224 |
$self->{onerror}->(type => 'syntax error', ## TODO: type |
$self->{onerror}->(type => 'XML 1.0 NCName:syntax error', |
| 225 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 226 |
node => $attr); |
node => $attr); |
| 227 |
} |
} |
| 228 |
|
|
| 229 |
my $base_uri = $attr->base_uri; |
my $base_uri = $attr->base_uri; |
| 230 |
if ($self->{id}->{$base_uri}->{$id}) { |
if ($self->{id}->{$base_uri}->{$id}) { |
| 231 |
$self->{onerror}->(type => 'duplicate rdf id', ## TODO: type |
$self->{onerror}->(type => 'duplicate rdf id', |
| 232 |
level => $self->{small_must_level}, |
level => $self->{level}->{rdf_lc_must}, |
| 233 |
node => $attr); |
node => $attr); |
| 234 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 235 |
} else { |
} else { |
| 245 |
if ({ |
if ({ |
| 246 |
ID => 1, about => 1, resource => 1, parseType => 1, type => 1, |
ID => 1, about => 1, resource => 1, parseType => 1, type => 1, |
| 247 |
}->{$attr_xuri}) { |
}->{$attr_xuri}) { |
| 248 |
$self->{onerror}->(type => 'unqualified rdf attr', ## TODO: type |
$self->{onerror}->(type => 'unqualified rdf attr', |
| 249 |
level => $self->{should_level}, |
level => $self->{level}->{should}, |
| 250 |
node => $attr); |
node => $attr); |
| 251 |
if ($node->has_attribute_ns ($RDF_URI, $attr_xuri)) { |
if ($node->has_attribute_ns ($RDF_URI, $attr_xuri)) { |
| 252 |
$self->{onerror}->(type => 'duplicate unqualified attr',## TODO: type |
$self->{onerror}->(type => 'duplicate unqualified attr', |
| 253 |
level => $self->{fact_level}, |
level => $self->{level}->{rdf_fact}, |
| 254 |
node => $attr); |
node => $attr); |
| 255 |
## NOTE: <? rdfa:bout="" about=""> and such are not catched |
## NOTE: <? rdfa:bout="" about=""> and such are not catched |
| 256 |
## by this check; but who cares? rdfa:bout="" is itself illegal. |
## by this check; but who cares? rdfa:bout="" is itself illegal. |
| 257 |
} |
} |
| 258 |
$attr_xuri = $RDF_URI . $attr_xuri; |
$attr_xuri = $RDF_URI . $attr_xuri; |
| 259 |
} else { |
} else { |
| 260 |
$self->{onerror}->(type => 'unqualified attr', ## TODO: type |
$self->{onerror}->(type => 'unqualified attr', |
| 261 |
level => $self->{fact_level}, |
level => $self->{level}->{rdf_fact}, |
| 262 |
node => $attr); |
node => $attr); |
| 263 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 264 |
} |
} |
| 283 |
%oldTerms, |
%oldTerms, |
| 284 |
}->{$xuri}) { |
}->{$xuri}) { |
| 285 |
$self->{onerror}->(type => 'element not allowed', |
$self->{onerror}->(type => 'element not allowed', |
| 286 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 287 |
node => $node); |
node => $node); |
| 288 |
|
|
| 289 |
## TODO: W3C RDF Validator: Continue validation, but triples that would |
## TODO: W3C RDF Validator: Continue validation, but triples that would |
| 322 |
$subject = {uri => $id_attr->($self, $attr)}; |
$subject = {uri => $id_attr->($self, $attr)}; |
| 323 |
} else { |
} else { |
| 324 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 325 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 326 |
node => $attr); |
node => $attr); |
| 327 |
|
|
| 328 |
## TODO: Ignore triple as W3C RDF Validator does |
## TODO: Ignore triple as W3C RDF Validator does |
| 331 |
unless (defined $subject) { |
unless (defined $subject) { |
| 332 |
my $id = $attr->value; |
my $id = $attr->value; |
| 333 |
unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) { |
unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) { |
| 334 |
$self->{onerror}->(type => 'syntax error', ## TODO: type |
$self->{onerror}->(type => 'XML 1.0 NCName:syntax error', |
| 335 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 336 |
node => $self); |
node => $self); |
| 337 |
} |
} |
| 338 |
|
|
| 339 |
$subject = {bnodeid => $get_bnodeid->($id)}; |
$subject = {bnodeid => $get_bnodeid->($id)}; |
| 340 |
} else { |
} else { |
| 341 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 342 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 343 |
node => $attr); |
node => $attr); |
| 344 |
|
|
| 345 |
## TODO: Ignore triple as W3C RDF Validator does |
## TODO: Ignore triple as W3C RDF Validator does |
| 349 |
$subject = {uri => $uri_attr->($self, $attr)}; |
$subject = {uri => $uri_attr->($self, $attr)}; |
| 350 |
} else { |
} else { |
| 351 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 352 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 353 |
node => $attr); |
node => $attr); |
| 354 |
|
|
| 355 |
## TODO: Ignore triple as W3C RDF Validator does |
## TODO: Ignore triple as W3C RDF Validator does |
| 363 |
%oldTerms, |
%oldTerms, |
| 364 |
}->{$attr_xuri}) { |
}->{$attr_xuri}) { |
| 365 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 366 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 367 |
node => $attr); |
node => $attr); |
| 368 |
|
|
| 369 |
## TODO: W3C RDF Validator: Ignore triples |
## TODO: W3C RDF Validator: Ignore triples |
| 413 |
$cn_type == $cn->CDATA_SECTION_NODE) { |
$cn_type == $cn->CDATA_SECTION_NODE) { |
| 414 |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
| 415 |
$self->{onerror}->(type => 'character not allowed', |
$self->{onerror}->(type => 'character not allowed', |
| 416 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 417 |
node => $cn); |
node => $cn); |
| 418 |
} |
} |
| 419 |
} |
} |
| 449 |
%oldTerms, |
%oldTerms, |
| 450 |
}->{$xuri}) { |
}->{$xuri}) { |
| 451 |
$self->{onerror}->(type => 'element not allowed', |
$self->{onerror}->(type => 'element not allowed', |
| 452 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 453 |
node => $node); |
node => $node); |
| 454 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 455 |
} |
} |
| 500 |
%oldTerms, |
%oldTerms, |
| 501 |
}->{$attr_xuri}) { |
}->{$attr_xuri}) { |
| 502 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 503 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 504 |
node => $attr); |
node => $attr); |
| 505 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 506 |
} else { |
} else { |
| 515 |
for my $attr ($resource_attr, $nodeid_attr, $dt_attr) { |
for my $attr ($resource_attr, $nodeid_attr, $dt_attr) { |
| 516 |
next unless $attr; |
next unless $attr; |
| 517 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 518 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 519 |
node => $attr); |
node => $attr); |
| 520 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 521 |
} |
} |
| 542 |
$cn_type == $cn->CDATA_SECTION_NODE) { |
$cn_type == $cn->CDATA_SECTION_NODE) { |
| 543 |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
| 544 |
$self->{onerror}->(type => 'character not allowed', |
$self->{onerror}->(type => 'character not allowed', |
| 545 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 546 |
node => $cn); |
node => $cn); |
| 547 |
} |
} |
| 548 |
} |
} |
| 553 |
for my $attr ($resource_attr, $nodeid_attr, $dt_attr) { |
for my $attr ($resource_attr, $nodeid_attr, $dt_attr) { |
| 554 |
next unless $attr; |
next unless $attr; |
| 555 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 556 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 557 |
node => $attr); |
node => $attr); |
| 558 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 559 |
} |
} |
| 569 |
$cn->node_type == $cn->CDATA_SECTION_NODE) { |
$cn->node_type == $cn->CDATA_SECTION_NODE) { |
| 570 |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) { |
| 571 |
$self->{onerror}->(type => 'character not allowed', |
$self->{onerror}->(type => 'character not allowed', |
| 572 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 573 |
node => $cn); |
node => $cn); |
| 574 |
} |
} |
| 575 |
} |
} |
| 613 |
# |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does? |
# |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does? |
| 614 |
|
|
| 615 |
$self->{onerror}->(type => 'parse type other', |
$self->{onerror}->(type => 'parse type other', |
| 616 |
level => $self->{info_level}, |
level => $self->{level}->{rdf_info}, |
| 617 |
node => $parse_attr); |
node => $parse_attr); |
| 618 |
} |
} |
| 619 |
|
|
| 620 |
for my $attr ($resource_attr, $nodeid_attr, $dt_attr) { |
for my $attr ($resource_attr, $nodeid_attr, $dt_attr) { |
| 621 |
next unless $attr; |
next unless $attr; |
| 622 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 623 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 624 |
node => $attr); |
node => $attr); |
| 625 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 626 |
} |
} |
| 657 |
$mode = 'resource'; |
$mode = 'resource'; |
| 658 |
} else { |
} else { |
| 659 |
$self->{onerror}->(type => 'element not allowed', |
$self->{onerror}->(type => 'element not allowed', |
| 660 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 661 |
node => $cn); |
node => $cn); |
| 662 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 663 |
} |
} |
| 664 |
} else { |
} else { |
| 665 |
## TODO: What RDF Validator does? |
## TODO: What RDF Validator does? |
| 666 |
$self->{onerror}->(type => 'second node element', |
$self->{onerror}->(type => 'second node element', |
| 667 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 668 |
node => $cn); |
node => $cn); |
| 669 |
} |
} |
| 670 |
} elsif ($cn_type == $cn->TEXT_NODE or |
} elsif ($cn_type == $cn->TEXT_NODE or |
| 678 |
$mode = 'literal'; |
$mode = 'literal'; |
| 679 |
} else { |
} else { |
| 680 |
$self->{onerror}->(type => 'character not allowed', |
$self->{onerror}->(type => 'character not allowed', |
| 681 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 682 |
node => $cn); |
node => $cn); |
| 683 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 684 |
} |
} |
| 698 |
for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) { |
for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) { |
| 699 |
next unless $attr; |
next unless $attr; |
| 700 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 701 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 702 |
node => $attr); |
node => $attr); |
| 703 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 704 |
} |
} |
| 717 |
for my $attr (@prop_attr, $resource_attr, $nodeid_attr) { |
for my $attr (@prop_attr, $resource_attr, $nodeid_attr) { |
| 718 |
next unless $attr; |
next unless $attr; |
| 719 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 720 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 721 |
node => $attr); |
node => $attr); |
| 722 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 723 |
} |
} |
| 748 |
for my $attr ($dt_attr) { |
for my $attr ($dt_attr) { |
| 749 |
next unless $attr; |
next unless $attr; |
| 750 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 751 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 752 |
node => $attr); |
node => $attr); |
| 753 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 754 |
} |
} |
| 766 |
$object = {uri => $uri_attr->($self, $resource_attr)}; |
$object = {uri => $uri_attr->($self, $resource_attr)}; |
| 767 |
if (defined $nodeid_attr) { |
if (defined $nodeid_attr) { |
| 768 |
$self->{onerror}->(type => 'attribute not allowed', |
$self->{onerror}->(type => 'attribute not allowed', |
| 769 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 770 |
node => $nodeid_attr); |
node => $nodeid_attr); |
| 771 |
## TODO: RDF Validator? |
## TODO: RDF Validator? |
| 772 |
} |
} |
| 773 |
} elsif ($nodeid_attr) { |
} elsif ($nodeid_attr) { |
| 774 |
my $id = $nodeid_attr->value; |
my $id = $nodeid_attr->value; |
| 775 |
unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) { |
unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) { |
| 776 |
$self->{onerror}->(type => 'syntax error', ## TODO: type |
$self->{onerror}->(type => 'XML 1.0 NCName:syntax error', |
| 777 |
level => $self->{grammer_level}, |
level => $self->{level}->{rdf_grammer}, |
| 778 |
node => $self); |
node => $self); |
| 779 |
} |
} |
| 780 |
$object = {bnodeid => $get_bnodeid->($id)}; |
$object = {bnodeid => $get_bnodeid->($id)}; |