/[suikacvs]/markup/html/whatpm/Whatpm/RDFXML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/RDFXML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Fri Aug 15 14:13:42 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +50 -46 lines
++ whatpm/Whatpm/ChangeLog	15 Aug 2008 14:12:36 -0000
	* ContentChecker.pm: Support for RDF/XML error levels.

	* HTMLTable.pm, RDFXML.pm: Support for new style of error level
	specifying.  Error types are revised.

2008-08-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ContentChecker/ChangeLog	15 Aug 2008 14:13:40 -0000
	* HTML.pm: Mark HTML4's "fact"-level errors as such.  Typo fixed.

2008-08-15  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::RDFXML;
2     use strict;
3    
4     ## NOTE: EntityReference nodes are not supported except that
5     ## unexpected entity reference are simply ignored. (In fact
6     ## all EntityRefernce nodes are ignored.)
7    
8 wakaba 1.2 ## TODO: Add a callback function invoked for every element
9     ## when XMLCC is implemented in WDCC.
10    
11     ## ISSUE: <html:nest/> in RDF subtree?
12    
13     ## ISSUE: PIs in RDF subtree should be validated?
14    
15 wakaba 1.5 ## TODO: Should we validate expanded URI created from QName?
16    
17     ## TODO: elements in null namespace (not mentioned in the spec.)
18    
19 wakaba 1.1 my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>;
20    
21 wakaba 1.8 use Char::Class::XML qw(InXML_NCNameStartChar10 InXMLNCNameChar10);
22 wakaba 1.5 require Whatpm::URIChecker;
23    
24 wakaba 1.1 sub new ($) {
25 wakaba 1.9 my $self = bless {
26     level => {
27     rdf_fact => 'm',
28     rdf_grammer => 'm',
29     rdf_lc_must => 'm',
30     info => 'i',
31     },
32     next_id => 0,
33     }, shift;
34 wakaba 1.1 $self->{onerror} = sub {
35     my %opt = @_;
36     warn $opt{type}, "\n";
37     };
38     $self->{ontriple} = sub {
39     my %opt = @_;
40     my $dump_resource = sub {
41     my $resource = shift;
42 wakaba 1.3 if (defined $resource->{uri}) {
43 wakaba 1.1 return '<' . $resource->{uri} . '>';
44 wakaba 1.3 } elsif (defined $resource->{bnodeid}) {
45 wakaba 1.1 return '_:' . $resource->{bnodeid};
46     } elsif ($resource->{nodes}) {
47     return '"' . join ('', map {$_->inner_html} @{$resource->{nodes}}) .
48     '"^^<' . $resource->{datatype} . '>';
49     } elsif (defined $resource->{value}) {
50     return '"' . $resource->{value} . '"' .
51     (defined $resource->{datatype}
52     ? '^^<' . $resource->{datatype} . '>'
53     : '@' . $resource->{language});
54     } else {
55     return '??';
56     }
57     };
58     print STDERR $dump_resource->($opt{subject}) . ' ';
59     print STDERR $dump_resource->($opt{predicate}) . ' ';
60     print STDERR $dump_resource->($opt{object}) . "\n";
61 wakaba 1.3 if ($dump_resource->{id}) {
62     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
63     print STDERR $dump_resource->({uri => $RDF_URI . 'subject'}) . ' ';
64     print STDERR $dump_resource->($opt{subject}) . "\n";
65     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
66     print STDERR $dump_resource->({uri => $RDF_URI . 'predicate'}) . ' ';
67     print STDERR $dump_resource->($opt{predicate}) . "\n";
68     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
69     print STDERR $dump_resource->({uri => $RDF_URI . 'object'}) . ' ';
70     print STDERR $dump_resource->($opt{object}) . "\n";
71     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
72     print STDERR $dump_resource->({uri => $RDF_URI . 'type'}) . ' ';
73     print STDERR $dump_resource->({uri => $RDF_URI . 'Statement'}) . "\n";
74     }
75 wakaba 1.1 };
76     return $self;
77     } # new
78    
79     sub convert_document ($$) {
80     my $self = shift;
81     my $node = shift; # Document
82    
83     ## ISSUE: An RDF/XML document, either |doc| or |nodeElement|
84     ## is allowed as a starting production. However, |nodeElement|
85     ## is not a Root Event.
86    
87     my $has_element;
88    
89     for my $cn (@{$node->child_nodes}) {
90     if ($cn->node_type == $cn->ELEMENT_NODE) {
91     unless ($has_element) {
92     if ($cn->manakai_expanded_uri eq $RDF_URI . q<RDF>) {
93 wakaba 1.7 $self->convert_rdf_element ($cn, language => '');
94 wakaba 1.1 } else {
95 wakaba 1.7 $self->convert_rdf_node_element ($cn, language => '');
96 wakaba 1.1 }
97     $has_element = 1;
98     } else {
99     $self->{onerror}->(type => 'second node element',
100 wakaba 1.9 level => $self->{level}->{rdf_grammer},
101 wakaba 1.1 node => $cn);
102     }
103     } elsif ($cn->node_type == $cn->TEXT_NODE or
104     $cn->node_type == $cn->CDATA_SECTION_NODE) {
105     $self->{onerror}->(type => 'character not allowed',
106 wakaba 1.9 level => $self->{level}->{rdf_grammer},
107 wakaba 1.1 node => $cn);
108     }
109     }
110     } # convert_document
111    
112     my $check_rdf_namespace = sub {
113     my $self = shift;
114     my $node = shift;
115     my $node_nsuri = $node->namespace_uri;
116     return unless defined $node_nsuri;
117     if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and
118     length $RDF_URI < length $node_nsuri) {
119     $self->{onerror}->(type => 'bad rdf namespace',
120 wakaba 1.9 level => $self->{level}->{rdf_fact}, # Section 5.1
121 wakaba 1.1 node => $node);
122     }
123     }; # $check_rdf_namespace
124    
125 wakaba 1.7 sub convert_rdf_element ($$%) {
126     my ($self, $node, %opt) = @_;
127     $opt{language} = '' unless defined $opt{language};
128     ## ISSUE: Not explicitly defined in the spec.
129 wakaba 1.1
130     $check_rdf_namespace->($self, $node);
131    
132     # |RDF|
133    
134     for my $attr (@{$node->attributes}) {
135 wakaba 1.7 my $nsuri = $attr->namespace_uri;
136     if (defined $nsuri and
137     $nsuri eq q<http://www.w3.org/XML/1998/namespace> and
138     $attr->manakai_local_name eq 'lang') {
139     $opt{language} = $attr->value;
140     next;
141     }
142    
143 wakaba 1.1 my $prefix = $attr->prefix;
144     if (defined $prefix) {
145     next if $prefix =~ /^[Xx][Mm][Ll]/;
146     } else {
147     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
148     }
149    
150     $check_rdf_namespace->($self, $attr);
151     $self->{onerror}->(type => 'attribute not allowed',
152 wakaba 1.9 level => $self->{level}->{rdf_grammer},
153 wakaba 1.1 node => $attr);
154     }
155    
156     # |nodeElementList|
157     for my $cn (@{$node->child_nodes}) {
158     if ($cn->node_type == $cn->ELEMENT_NODE) {
159 wakaba 1.7 $self->convert_node_element ($cn, language => $opt{language});
160 wakaba 1.1 } elsif ($cn->node_type == $cn->TEXT_NODE or
161     $cn->node_type == $cn->CDATA_SECTION_NODE) {
162     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
163     $self->{onerror}->(type => 'character not allowed',
164 wakaba 1.9 level => $self->{level}->{rdf_grammer},
165 wakaba 1.1 node => $cn);
166     }
167     }
168     }
169     } # convert_rdf_element
170    
171     my %coreSyntaxTerms = (
172     $RDF_URI . 'RDF' => 1,
173     $RDF_URI . 'ID' => 1,
174     $RDF_URI . 'about' => 1,
175     $RDF_URI . 'parseType' => 1,
176     $RDF_URI . 'resource' => 1,
177     $RDF_URI . 'nodeID' => 1,
178     $RDF_URI . 'datatype' => 1,
179     );
180    
181     my %oldTerms = (
182     $RDF_URI . 'aboutEach' => 1,
183     $RDF_URI . 'aboutEachPrefix' => 1,
184     $RDF_URI . 'bagID' => 1,
185     );
186    
187 wakaba 1.4 require Message::DOM::DOMImplementation;
188     my $resolve = sub {
189     return Message::DOM::DOMImplementation->create_uri_reference ($_[0])
190     ->get_absolute_reference ($_[1]->base_uri)
191     ->uri_reference;
192    
193 wakaba 1.5 ## TODO: Ummm... RDF/XML spec refers dated version of xml:base and RFC 2396...
194    
195 wakaba 1.4 ## TODO: Check latest xml:base and IRI spec...
196     ## (non IRI/URI chars should be percent-encoded before resolve?)
197     }; # $resolve
198    
199     my $generate_bnodeid = sub {
200     return 'g'.$_[0]->{next_id}++;
201     }; # $generate_bnodeid
202    
203     my $get_bnodeid = sub {
204     return 'b'.$_[0];
205     }; # $get_bnodeid
206    
207 wakaba 1.5 my $uri_attr = sub {
208     my ($self, $attr) = @_;
209    
210     my $abs_uri = $resolve->($attr->value, $attr);
211    
212     Whatpm::URIChecker->check_iri_reference ($abs_uri, sub {
213 wakaba 1.9 $self->{onerror}->(@_, node => $attr);
214 wakaba 1.5 });
215    
216     return $abs_uri;
217 wakaba 1.6 }; # $uri_attr
218    
219     my $id_attr = sub {
220     my ($self, $attr) = @_;
221    
222     my $id = $attr->value;
223     unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
224 wakaba 1.9 $self->{onerror}->(type => 'XML 1.0 NCName:syntax error',
225     level => $self->{level}->{rdf_grammer},
226 wakaba 1.6 node => $attr);
227     }
228 wakaba 1.7
229     my $base_uri = $attr->base_uri;
230     if ($self->{id}->{$base_uri}->{$id}) {
231 wakaba 1.9 $self->{onerror}->(type => 'duplicate rdf id',
232     level => $self->{level}->{rdf_lc_must},
233 wakaba 1.7 node => $attr);
234     ## TODO: RDF Validator?
235     } else {
236     $self->{id}->{$base_uri}->{$id} = 1;
237     }
238 wakaba 1.6
239     return $resolve->('#' . $id, $attr);
240 wakaba 1.7 }; # $id_attr
241 wakaba 1.5
242 wakaba 1.7 my $check_local_attr = sub {
243     my ($self, $node, $attr, $attr_xuri) = @_;
244    
245     if ({
246     ID => 1, about => 1, resource => 1, parseType => 1, type => 1,
247     }->{$attr_xuri}) {
248 wakaba 1.9 $self->{onerror}->(type => 'unqualified rdf attr',
249     level => $self->{level}->{should},
250 wakaba 1.7 node => $attr);
251     if ($node->has_attribute_ns ($RDF_URI, $attr_xuri)) {
252 wakaba 1.9 $self->{onerror}->(type => 'duplicate unqualified attr',
253     level => $self->{level}->{rdf_fact},
254 wakaba 1.7 node => $attr);
255     ## NOTE: <? rdfa:bout="" about=""> and such are not catched
256     ## by this check; but who cares? rdfa:bout="" is itself illegal.
257     }
258     $attr_xuri = $RDF_URI . $attr_xuri;
259     } else {
260 wakaba 1.9 $self->{onerror}->(type => 'unqualified attr',
261     level => $self->{level}->{rdf_fact},
262 wakaba 1.7 node => $attr);
263     ## TODO: RDF Validator?
264     }
265    
266     return $attr_xuri;
267     }; # $check_local_attr
268 wakaba 1.5
269 wakaba 1.7 sub convert_node_element ($$;%) {
270     my ($self, $node, %opt) = @_;
271     $opt{language} = '' unless defined $opt{language};
272     ## ISSUE: Not explicitly defined in the spec.
273 wakaba 1.1
274     $check_rdf_namespace->($self, $node);
275    
276     # |nodeElement|
277    
278     my $xuri = $node->manakai_expanded_uri;
279    
280     if ({
281     %coreSyntaxTerms,
282     $RDF_URI . 'li' => 1,
283     %oldTerms,
284     }->{$xuri}) {
285     $self->{onerror}->(type => 'element not allowed',
286 wakaba 1.9 level => $self->{level}->{rdf_grammer},
287 wakaba 1.1 node => $node);
288    
289     ## TODO: W3C RDF Validator: Continue validation, but triples that would
290     ## be generated from the subtree are ignored.
291     }
292    
293     my $subject;
294 wakaba 1.4 my $type_attr;
295 wakaba 1.1 my @prop_attr;
296    
297     for my $attr (@{$node->attributes}) {
298 wakaba 1.7 my $nsuri = $attr->namespace_uri;
299     if (defined $nsuri and
300     $nsuri eq q<http://www.w3.org/XML/1998/namespace> and
301     $attr->manakai_local_name eq 'lang') {
302     $opt{language} = $attr->value;
303     }
304    
305 wakaba 1.1 my $prefix = $attr->prefix;
306     if (defined $prefix) {
307     next if $prefix =~ /^[Xx][Mm][Ll]/;
308     } else {
309     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
310     }
311    
312     $check_rdf_namespace->($self, $attr);
313    
314     my $attr_xuri = $attr->manakai_expanded_uri;
315 wakaba 1.7
316     unless (defined $nsuri) {
317     $attr_xuri = $check_local_attr->($self, $node, $attr, $attr_xuri);
318     }
319    
320 wakaba 1.2 if ($attr_xuri eq $RDF_URI . 'ID') {
321 wakaba 1.1 unless (defined $subject) {
322 wakaba 1.6 $subject = {uri => $id_attr->($self, $attr)};
323 wakaba 1.1 } else {
324 wakaba 1.5 $self->{onerror}->(type => 'attribute not allowed',
325 wakaba 1.9 level => $self->{level}->{rdf_grammer},
326 wakaba 1.5 node => $attr);
327    
328 wakaba 1.1 ## TODO: Ignore triple as W3C RDF Validator does
329     }
330     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
331     unless (defined $subject) {
332 wakaba 1.5 my $id = $attr->value;
333     unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
334 wakaba 1.9 $self->{onerror}->(type => 'XML 1.0 NCName:syntax error',
335     level => $self->{level}->{rdf_grammer},
336 wakaba 1.5 node => $self);
337     }
338    
339     $subject = {bnodeid => $get_bnodeid->($id)};
340 wakaba 1.1 } else {
341 wakaba 1.5 $self->{onerror}->(type => 'attribute not allowed',
342 wakaba 1.9 level => $self->{level}->{rdf_grammer},
343 wakaba 1.5 node => $attr);
344    
345 wakaba 1.1 ## TODO: Ignore triple as W3C RDF Validator does
346     }
347     } elsif ($attr_xuri eq $RDF_URI . 'about') {
348     unless (defined $subject) {
349 wakaba 1.5 $subject = {uri => $uri_attr->($self, $attr)};
350 wakaba 1.1 } else {
351 wakaba 1.5 $self->{onerror}->(type => 'attribute not allowed',
352 wakaba 1.9 level => $self->{level}->{rdf_grammer},
353 wakaba 1.5 node => $attr);
354    
355 wakaba 1.1 ## TODO: Ignore triple as W3C RDF Validator does
356     }
357     } elsif ($attr_xuri eq $RDF_URI . 'type') {
358 wakaba 1.4 $type_attr = $attr;
359 wakaba 1.1 } elsif ({
360     %coreSyntaxTerms,
361     $RDF_URI . 'li' => 1,
362     $RDF_URI . 'Description' => 1,
363     %oldTerms,
364     }->{$attr_xuri}) {
365     $self->{onerror}->(type => 'attribute not allowed',
366 wakaba 1.9 level => $self->{level}->{rdf_grammer},
367 wakaba 1.1 node => $attr);
368    
369     ## TODO: W3C RDF Validator: Ignore triples
370     } else {
371     push @prop_attr, $attr;
372     }
373     }
374    
375     unless (defined $subject) {
376 wakaba 1.4 $subject = {bnodeid => $generate_bnodeid->($self)};
377 wakaba 1.1 }
378    
379     if ($xuri ne $RDF_URI . 'Description') {
380     $self->{ontriple}->(subject => $subject,
381     predicate => {uri => $RDF_URI . 'type'},
382 wakaba 1.2 object => {uri => $xuri},
383     node => $node);
384 wakaba 1.1 }
385    
386 wakaba 1.4 if ($type_attr) {
387 wakaba 1.1 $self->{ontriple}->(subject => $subject,
388     predicate => {uri => $RDF_URI . 'type'},
389 wakaba 1.4 object => {uri => $resolve->($type_attr->value,
390     $type_attr)},
391     node => $type_attr);
392 wakaba 1.1 }
393    
394     for my $attr (@prop_attr) {
395     $self->{ontriple}->(subject => $subject,
396     predicate => {uri => $attr->manakai_expanded_uri},
397 wakaba 1.7 object => {value => $attr->value,
398     language => $opt{language}},
399 wakaba 1.2 node => $attr);
400 wakaba 1.1 ## TODO: SHOULD in NFC
401     }
402    
403     # |propertyEltList|
404    
405     my $li_counter = 1;
406     for my $cn (@{$node->child_nodes}) {
407     my $cn_type = $cn->node_type;
408     if ($cn_type == $cn->ELEMENT_NODE) {
409     $self->convert_property_element ($cn, li_counter => \$li_counter,
410 wakaba 1.7 subject => $subject,
411     language => $opt{language});
412 wakaba 1.1 } elsif ($cn_type == $cn->TEXT_NODE or
413     $cn_type == $cn->CDATA_SECTION_NODE) {
414     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
415     $self->{onerror}->(type => 'character not allowed',
416 wakaba 1.9 level => $self->{level}->{rdf_grammer},
417 wakaba 1.1 node => $cn);
418     }
419     }
420     }
421    
422     return $subject;
423     } # convert_node_element
424    
425 wakaba 1.4 my $get_id_resource = sub {
426 wakaba 1.5 my $self = shift;
427     my $node = shift;
428 wakaba 1.6
429 wakaba 1.5 return undef unless $node;
430 wakaba 1.6
431     return {uri => $id_attr->($self, $node)};
432 wakaba 1.4 }; # $get_id_resource
433    
434 wakaba 1.1 sub convert_property_element ($$%) {
435     my ($self, $node, %opt) = @_;
436    
437     $check_rdf_namespace->($self, $node);
438    
439     # |propertyElt|
440    
441     my $xuri = $node->manakai_expanded_uri;
442     if ($xuri eq $RDF_URI . 'li') {
443     $xuri = $RDF_URI . '_' . ${$opt{li_counter}}++;
444     }
445    
446     if ({
447     %coreSyntaxTerms,
448     $RDF_URI . 'Description' => 1,
449     %oldTerms,
450     }->{$xuri}) {
451     $self->{onerror}->(type => 'element not allowed',
452 wakaba 1.9 level => $self->{level}->{rdf_grammer},
453 wakaba 1.1 node => $node);
454     ## TODO: RDF Validator?
455     }
456    
457 wakaba 1.6 my $rdf_id_attr;
458 wakaba 1.1 my $dt_attr;
459     my $parse_attr;
460     my $nodeid_attr;
461     my $resource_attr;
462     my @prop_attr;
463     for my $attr (@{$node->attributes}) {
464 wakaba 1.7 my $nsuri = $attr->namespace_uri;
465     if (defined $nsuri and
466     $nsuri eq q<http://www.w3.org/XML/1998/namespace> and
467     $attr->manakai_local_name eq 'lang') {
468     $opt{language} = $attr->value;
469     }
470    
471 wakaba 1.1 my $prefix = $attr->prefix;
472     if (defined $prefix) {
473     next if $prefix =~ /^[Xx][Mm][Ll]/;
474     } else {
475     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
476     }
477    
478     $check_rdf_namespace->($self, $attr);
479    
480     my $attr_xuri = $attr->manakai_expanded_uri;
481 wakaba 1.7
482     unless (defined $nsuri) {
483     $attr_xuri = $check_local_attr->($self, $node, $attr, $attr_xuri);
484     }
485    
486 wakaba 1.1 if ($attr_xuri eq $RDF_URI . 'ID') {
487 wakaba 1.6 $rdf_id_attr = $attr;
488 wakaba 1.1 } elsif ($attr_xuri eq $RDF_URI . 'datatype') {
489     $dt_attr = $attr;
490     } elsif ($attr_xuri eq $RDF_URI . 'parseType') {
491     $parse_attr = $attr;
492     } elsif ($attr_xuri eq $RDF_URI . 'resource') {
493     $resource_attr = $attr;
494     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
495     $nodeid_attr = $attr;
496     } elsif ({
497     %coreSyntaxTerms,
498     $RDF_URI . 'li' => 1,
499     $RDF_URI . 'Description' => 1,
500     %oldTerms,
501     }->{$attr_xuri}) {
502     $self->{onerror}->(type => 'attribute not allowed',
503 wakaba 1.9 level => $self->{level}->{rdf_grammer},
504 wakaba 1.1 node => $attr);
505     ## TODO: RDF Validator?
506     } else {
507     push @prop_attr, $attr;
508     }
509     }
510    
511     my $parse = $parse_attr ? $parse_attr->value : '';
512     if ($parse eq 'Resource') {
513     # |parseTypeResourcePropertyElt|
514    
515     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
516 wakaba 1.2 next unless $attr;
517 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
518 wakaba 1.9 level => $self->{level}->{rdf_grammer},
519 wakaba 1.1 node => $attr);
520     ## TODO: RDF Validator?
521     }
522    
523 wakaba 1.4 my $object = {bnodeid => $generate_bnodeid->($self)};
524 wakaba 1.1 $self->{ontriple}->(subject => $opt{subject},
525     predicate => {uri => $xuri},
526 wakaba 1.2 object => $object,
527 wakaba 1.3 node => $node,
528 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
529 wakaba 1.1
530     ## As if nodeElement
531    
532     # |propertyEltList|
533    
534     my $li_counter = 1;
535     for my $cn (@{$node->child_nodes}) {
536     my $cn_type = $cn->node_type;
537     if ($cn_type == $cn->ELEMENT_NODE) {
538     $self->convert_property_element ($cn, li_counter => \$li_counter,
539 wakaba 1.7 subject => $object,
540     language => $opt{language});
541 wakaba 1.1 } elsif ($cn_type == $cn->TEXT_NODE or
542     $cn_type == $cn->CDATA_SECTION_NODE) {
543     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
544     $self->{onerror}->(type => 'character not allowed',
545 wakaba 1.9 level => $self->{level}->{rdf_grammer},
546 wakaba 1.1 node => $cn);
547     }
548     }
549     }
550     } elsif ($parse eq 'Collection') {
551     # |parseTypeCollectionPropertyElt|
552    
553     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
554 wakaba 1.2 next unless $attr;
555 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
556 wakaba 1.9 level => $self->{level}->{rdf_grammer},
557 wakaba 1.1 node => $attr);
558     ## TODO: RDF Validator?
559     }
560    
561     # |nodeElementList|
562     my @resource;
563     for my $cn (@{$node->child_nodes}) {
564     if ($cn->node_type == $cn->ELEMENT_NODE) {
565     push @resource, [$self->convert_node_element ($cn),
566 wakaba 1.4 {bnodeid => $generate_bnodeid->($self)},
567 wakaba 1.2 $cn];
568 wakaba 1.1 } elsif ($cn->node_type == $cn->TEXT_NODE or
569     $cn->node_type == $cn->CDATA_SECTION_NODE) {
570     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
571     $self->{onerror}->(type => 'character not allowed',
572 wakaba 1.9 level => $self->{level}->{rdf_grammer},
573 wakaba 1.1 node => $cn);
574     }
575     }
576     }
577    
578     if (@resource) {
579     $self->{ontriple}->(subject => $opt{subject},
580     predicate => {uri => $xuri},
581 wakaba 1.2 object => $resource[0]->[1],
582     node => $node);
583 wakaba 1.1 } else {
584     $self->{ontriple}->(subject => $opt{subject},
585     predicate => {uri => $xuri},
586 wakaba 1.2 object => {uri => $RDF_URI . 'nil'},
587 wakaba 1.3 node => $node,
588 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
589 wakaba 1.1 }
590 wakaba 1.3
591 wakaba 1.1 while (@resource) {
592     my $resource = shift @resource;
593     $self->{ontriple}->(subject => $resource->[1],
594     predicate => {uri => $RDF_URI . 'first'},
595 wakaba 1.2 object => $resource->[0],
596     node => $resource->[2]);
597 wakaba 1.1 if (@resource) {
598     $self->{ontriple}->(subject => $resource->[1],
599     predicate => {uri => $RDF_URI . 'rest'},
600 wakaba 1.2 object => $resource[0]->[1],
601     node => $resource->[2]);
602 wakaba 1.1 } else {
603     $self->{ontriple}->(subject => $resource->[1],
604     predicate => {uri => $RDF_URI . 'rest'},
605 wakaba 1.2 object => {uri => $RDF_URI . 'nil'},
606     node => $resource->[2]);
607 wakaba 1.1 }
608     }
609     } elsif ($parse_attr) {
610     # |parseTypeLiteralPropertyElt|
611 wakaba 1.5
612     if ($parse ne 'Literal') {
613     # |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does?
614    
615     $self->{onerror}->(type => 'parse type other',
616 wakaba 1.9 level => $self->{level}->{rdf_info},
617 wakaba 1.5 node => $parse_attr);
618     }
619 wakaba 1.1
620     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
621 wakaba 1.2 next unless $attr;
622 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
623 wakaba 1.9 level => $self->{level}->{rdf_grammer},
624 wakaba 1.1 node => $attr);
625     ## TODO: RDF Validator?
626     }
627    
628     my $value = [@{$node->child_nodes}];
629     ## TODO: Callback for validation
630     ## TODO: Serialized form SHOULD be in NFC.
631    
632     $self->{ontriple}->(subject => $opt{subject},
633     predicate => {uri => $xuri},
634     object => {nodes => $value,
635 wakaba 1.2 datatype => $RDF_URI . 'XMLLiteral'},
636 wakaba 1.3 node => $node,
637 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
638 wakaba 1.1 } else {
639     my $mode = 'unknown';
640    
641     if ($dt_attr) {
642     $mode = 'literal'; # |literalPropertyElt|
643     ## TODO: What RDF Validator does for |< rdf:datatype><el/></>|?
644     }
645     ## TODO: What RDF Validator does for |< prop-attr><non-empty/></>|?
646    
647     my $node_element;
648     my $text = '';
649     for my $cn (@{$node->child_nodes}) {
650     my $cn_type = $cn->node_type;
651     if ($cn_type == $cn->ELEMENT_NODE) {
652     unless ($node_element) {
653     $node_element = $cn;
654     if ({
655     resource => 1, unknown => 1, 'literal-or-resource' => 1,
656     }->{$mode}) {
657     $mode = 'resource';
658     } else {
659     $self->{onerror}->(type => 'element not allowed',
660 wakaba 1.9 level => $self->{level}->{rdf_grammer},
661 wakaba 1.1 node => $cn);
662     ## TODO: RDF Validator?
663     }
664     } else {
665     ## TODO: What RDF Validator does?
666     $self->{onerror}->(type => 'second node element',
667 wakaba 1.9 level => $self->{level}->{rdf_grammer},
668 wakaba 1.1 node => $cn);
669     }
670     } elsif ($cn_type == $cn->TEXT_NODE or
671     $cn_type == $cn->CDATA_SECTION_NODE) {
672     my $data = $cn->data;
673     $text .= $data;
674     if ($data =~ /[^\x09\x0A\x0D\x20]/) {
675     if ({
676     literal => 1, unknown => 1, 'literal-or-resource' => 1,
677     }->{$mode}) {
678     $mode = 'literal';
679     } else {
680     $self->{onerror}->(type => 'character not allowed',
681 wakaba 1.9 level => $self->{level}->{rdf_grammer},
682 wakaba 1.1 node => $cn);
683     ## TODO: RDF Validator?
684     }
685     } else {
686     if ($mode eq 'unknown') {
687     $mode = 'literal-or-resource';
688     } else {
689     #
690     }
691     }
692     }
693     }
694    
695     if ($mode eq 'resource') {
696     # |resourcePropertyElt|
697    
698     for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) {
699 wakaba 1.2 next unless $attr;
700 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
701 wakaba 1.9 level => $self->{level}->{rdf_grammer},
702 wakaba 1.1 node => $attr);
703     ## TODO: RDF Validator?
704     }
705    
706 wakaba 1.7 my $object = $self->convert_node_element ($node_element,
707     language => $opt{language});
708 wakaba 1.1
709     $self->{ontriple}->(subject => $opt{subject},
710     predicate => {uri => $xuri},
711 wakaba 1.2 object => $object,
712 wakaba 1.3 node => $node,
713 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
714 wakaba 1.1 } elsif ($mode eq 'literal' or $mode eq 'literal-or-resource') {
715     # |literalPropertyElt|
716    
717     for my $attr (@prop_attr, $resource_attr, $nodeid_attr) {
718 wakaba 1.2 next unless $attr;
719 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
720 wakaba 1.9 level => $self->{level}->{rdf_grammer},
721 wakaba 1.1 node => $attr);
722     ## TODO: RDF Validator?
723     }
724    
725     ## TODO: $text SHOULD be in NFC
726    
727     if ($dt_attr) {
728 wakaba 1.5 $self->{ontriple}
729     ->(subject => $opt{subject},
730     predicate => {uri => $xuri},
731     object => {value => $text,
732     datatype => $uri_attr->$self, ($dt_attr->value)},
733     ## ISSUE: No resolve() in the spec (but spec says that
734     ## xml:base is applied also to rdf:datatype).
735     node => $node,
736 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
737 wakaba 1.1 } else {
738     $self->{ontriple}->(subject => $opt{subject},
739     predicate => {uri => $xuri},
740     object => {value => $text,
741 wakaba 1.7 language => $opt{language}},
742 wakaba 1.3 node => $node,
743 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
744 wakaba 1.1 }
745     } else {
746     ## |emptyPropertyElt|
747    
748     for my $attr ($dt_attr) {
749 wakaba 1.2 next unless $attr;
750 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
751 wakaba 1.9 level => $self->{level}->{rdf_grammer},
752 wakaba 1.1 node => $attr);
753     ## TODO: RDF Validator?
754     }
755    
756     if (not $resource_attr and not $nodeid_attr and not @prop_attr) {
757     $self->{ontriple}->(subject => $opt{subject},
758     predicate => {uri => $xuri},
759     object => {value => '',
760 wakaba 1.7 language => $opt{language}},
761 wakaba 1.3 node => $node,
762 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
763 wakaba 1.1 } else {
764     my $object;
765     if ($resource_attr) {
766 wakaba 1.5 $object = {uri => $uri_attr->($self, $resource_attr)};
767     if (defined $nodeid_attr) {
768     $self->{onerror}->(type => 'attribute not allowed',
769 wakaba 1.9 level => $self->{level}->{rdf_grammer},
770 wakaba 1.5 node => $nodeid_attr);
771     ## TODO: RDF Validator?
772     }
773 wakaba 1.1 } elsif ($nodeid_attr) {
774 wakaba 1.5 my $id = $nodeid_attr->value;
775     unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
776 wakaba 1.9 $self->{onerror}->(type => 'XML 1.0 NCName:syntax error',
777     level => $self->{level}->{rdf_grammer},
778 wakaba 1.5 node => $self);
779     }
780     $object = {bnodeid => $get_bnodeid->($id)};
781 wakaba 1.1 } else {
782 wakaba 1.4 $object = {bnodeid => $generate_bnodeid->($self)};
783 wakaba 1.1 }
784    
785     for my $attr (@prop_attr) {
786     my $attr_xuri = $attr->manakai_expanded_uri;
787     if ($attr_xuri eq $RDF_URI . 'type') {
788     $self->{ontriple}->(subject => $object,
789     predicate => {uri => $attr_xuri},
790 wakaba 1.4 object => $resolve->($attr->value, $attr),
791 wakaba 1.2 node => $attr);
792 wakaba 1.1 } else {
793     ## TODO: SHOULD be in NFC
794     $self->{ontriple}->(subject => $object,
795     predicate => {uri => $attr_xuri},
796     object => {value => $attr->value,
797 wakaba 1.7 language => $opt{language}},
798 wakaba 1.2 node => $attr);
799 wakaba 1.1 }
800     }
801    
802     $self->{ontriple}->(subject => $opt{subject},
803     predicate => {uri => $xuri},
804 wakaba 1.2 object => $object,
805 wakaba 1.3 node => $node,
806 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
807 wakaba 1.1 }
808     }
809     }
810     } # convert_property_element
811    
812     1;
813    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24