/[suikacvs]/markup/html/whatpm/Whatpm/RDFXML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/RDFXML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations) (download)
Fri Aug 15 16:02:02 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.9: +8 -2 lines
++ whatpm/Whatpm/ChangeLog	15 Aug 2008 16:01:09 -0000
2008-08-16  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm, RDFXML.pm: Pass {level} object to language tag
	and URL checkers.  Support for more error levels for bogus
	langauge tag and URL "standards".

	* LangTag.pm, URIChecker.pm: Support for new style error
	level reporting.

++ whatpm/Whatpm/ContentChecker/ChangeLog	15 Aug 2008 16:01:45 -0000
2008-08-16  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm, Atom.pm: Pass {level} object to language tag
	and URL checkers.

1 wakaba 1.1 package Whatpm::RDFXML;
2     use strict;
3    
4     ## NOTE: EntityReference nodes are not supported except that
5     ## unexpected entity reference are simply ignored. (In fact
6     ## all EntityRefernce nodes are ignored.)
7    
8 wakaba 1.2 ## TODO: Add a callback function invoked for every element
9     ## when XMLCC is implemented in WDCC.
10    
11     ## ISSUE: <html:nest/> in RDF subtree?
12    
13     ## ISSUE: PIs in RDF subtree should be validated?
14    
15 wakaba 1.5 ## TODO: Should we validate expanded URI created from QName?
16    
17     ## TODO: elements in null namespace (not mentioned in the spec.)
18    
19 wakaba 1.1 my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>;
20    
21 wakaba 1.8 use Char::Class::XML qw(InXML_NCNameStartChar10 InXMLNCNameChar10);
22 wakaba 1.5 require Whatpm::URIChecker;
23    
24 wakaba 1.1 sub new ($) {
25 wakaba 1.9 my $self = bless {
26     level => {
27     rdf_fact => 'm',
28     rdf_grammer => 'm',
29     rdf_lc_must => 'm',
30     info => 'i',
31     },
32     next_id => 0,
33     }, shift;
34 wakaba 1.1 $self->{onerror} = sub {
35     my %opt = @_;
36     warn $opt{type}, "\n";
37     };
38     $self->{ontriple} = sub {
39     my %opt = @_;
40     my $dump_resource = sub {
41     my $resource = shift;
42 wakaba 1.3 if (defined $resource->{uri}) {
43 wakaba 1.1 return '<' . $resource->{uri} . '>';
44 wakaba 1.3 } elsif (defined $resource->{bnodeid}) {
45 wakaba 1.1 return '_:' . $resource->{bnodeid};
46     } elsif ($resource->{nodes}) {
47     return '"' . join ('', map {$_->inner_html} @{$resource->{nodes}}) .
48     '"^^<' . $resource->{datatype} . '>';
49     } elsif (defined $resource->{value}) {
50     return '"' . $resource->{value} . '"' .
51     (defined $resource->{datatype}
52     ? '^^<' . $resource->{datatype} . '>'
53     : '@' . $resource->{language});
54     } else {
55     return '??';
56     }
57     };
58     print STDERR $dump_resource->($opt{subject}) . ' ';
59     print STDERR $dump_resource->($opt{predicate}) . ' ';
60     print STDERR $dump_resource->($opt{object}) . "\n";
61 wakaba 1.3 if ($dump_resource->{id}) {
62     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
63     print STDERR $dump_resource->({uri => $RDF_URI . 'subject'}) . ' ';
64     print STDERR $dump_resource->($opt{subject}) . "\n";
65     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
66     print STDERR $dump_resource->({uri => $RDF_URI . 'predicate'}) . ' ';
67     print STDERR $dump_resource->($opt{predicate}) . "\n";
68     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
69     print STDERR $dump_resource->({uri => $RDF_URI . 'object'}) . ' ';
70     print STDERR $dump_resource->($opt{object}) . "\n";
71     print STDERR $dump_resource->($dump_resource->{id}) . ' ';
72     print STDERR $dump_resource->({uri => $RDF_URI . 'type'}) . ' ';
73     print STDERR $dump_resource->({uri => $RDF_URI . 'Statement'}) . "\n";
74     }
75 wakaba 1.1 };
76     return $self;
77     } # new
78    
79     sub convert_document ($$) {
80     my $self = shift;
81     my $node = shift; # Document
82    
83     ## ISSUE: An RDF/XML document, either |doc| or |nodeElement|
84     ## is allowed as a starting production. However, |nodeElement|
85     ## is not a Root Event.
86    
87     my $has_element;
88    
89     for my $cn (@{$node->child_nodes}) {
90     if ($cn->node_type == $cn->ELEMENT_NODE) {
91     unless ($has_element) {
92     if ($cn->manakai_expanded_uri eq $RDF_URI . q<RDF>) {
93 wakaba 1.7 $self->convert_rdf_element ($cn, language => '');
94 wakaba 1.1 } else {
95 wakaba 1.7 $self->convert_rdf_node_element ($cn, language => '');
96 wakaba 1.1 }
97     $has_element = 1;
98     } else {
99     $self->{onerror}->(type => 'second node element',
100 wakaba 1.9 level => $self->{level}->{rdf_grammer},
101 wakaba 1.1 node => $cn);
102     }
103     } elsif ($cn->node_type == $cn->TEXT_NODE or
104     $cn->node_type == $cn->CDATA_SECTION_NODE) {
105     $self->{onerror}->(type => 'character not allowed',
106 wakaba 1.9 level => $self->{level}->{rdf_grammer},
107 wakaba 1.1 node => $cn);
108     }
109     }
110     } # convert_document
111    
112     my $check_rdf_namespace = sub {
113     my $self = shift;
114     my $node = shift;
115     my $node_nsuri = $node->namespace_uri;
116     return unless defined $node_nsuri;
117     if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and
118     length $RDF_URI < length $node_nsuri) {
119     $self->{onerror}->(type => 'bad rdf namespace',
120 wakaba 1.9 level => $self->{level}->{rdf_fact}, # Section 5.1
121 wakaba 1.1 node => $node);
122     }
123     }; # $check_rdf_namespace
124    
125 wakaba 1.7 sub convert_rdf_element ($$%) {
126     my ($self, $node, %opt) = @_;
127     $opt{language} = '' unless defined $opt{language};
128     ## ISSUE: Not explicitly defined in the spec.
129 wakaba 1.1
130     $check_rdf_namespace->($self, $node);
131    
132     # |RDF|
133    
134     for my $attr (@{$node->attributes}) {
135 wakaba 1.7 my $nsuri = $attr->namespace_uri;
136     if (defined $nsuri and
137     $nsuri eq q<http://www.w3.org/XML/1998/namespace> and
138     $attr->manakai_local_name eq 'lang') {
139     $opt{language} = $attr->value;
140     next;
141     }
142    
143 wakaba 1.1 my $prefix = $attr->prefix;
144     if (defined $prefix) {
145     next if $prefix =~ /^[Xx][Mm][Ll]/;
146     } else {
147     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
148 wakaba 1.10 ## TODO: "ignored" warning...
149 wakaba 1.1 }
150    
151     $check_rdf_namespace->($self, $attr);
152     $self->{onerror}->(type => 'attribute not allowed',
153 wakaba 1.9 level => $self->{level}->{rdf_grammer},
154 wakaba 1.1 node => $attr);
155     }
156    
157     # |nodeElementList|
158     for my $cn (@{$node->child_nodes}) {
159     if ($cn->node_type == $cn->ELEMENT_NODE) {
160 wakaba 1.7 $self->convert_node_element ($cn, language => $opt{language});
161 wakaba 1.1 } elsif ($cn->node_type == $cn->TEXT_NODE or
162     $cn->node_type == $cn->CDATA_SECTION_NODE) {
163     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
164     $self->{onerror}->(type => 'character not allowed',
165 wakaba 1.9 level => $self->{level}->{rdf_grammer},
166 wakaba 1.1 node => $cn);
167     }
168     }
169     }
170     } # convert_rdf_element
171    
172     my %coreSyntaxTerms = (
173     $RDF_URI . 'RDF' => 1,
174     $RDF_URI . 'ID' => 1,
175     $RDF_URI . 'about' => 1,
176     $RDF_URI . 'parseType' => 1,
177     $RDF_URI . 'resource' => 1,
178     $RDF_URI . 'nodeID' => 1,
179     $RDF_URI . 'datatype' => 1,
180     );
181    
182     my %oldTerms = (
183     $RDF_URI . 'aboutEach' => 1,
184     $RDF_URI . 'aboutEachPrefix' => 1,
185     $RDF_URI . 'bagID' => 1,
186     );
187    
188 wakaba 1.4 require Message::DOM::DOMImplementation;
189     my $resolve = sub {
190     return Message::DOM::DOMImplementation->create_uri_reference ($_[0])
191     ->get_absolute_reference ($_[1]->base_uri)
192     ->uri_reference;
193    
194 wakaba 1.5 ## TODO: Ummm... RDF/XML spec refers dated version of xml:base and RFC 2396...
195    
196 wakaba 1.4 ## TODO: Check latest xml:base and IRI spec...
197     ## (non IRI/URI chars should be percent-encoded before resolve?)
198     }; # $resolve
199    
200     my $generate_bnodeid = sub {
201     return 'g'.$_[0]->{next_id}++;
202     }; # $generate_bnodeid
203    
204     my $get_bnodeid = sub {
205     return 'b'.$_[0];
206     }; # $get_bnodeid
207    
208 wakaba 1.5 my $uri_attr = sub {
209     my ($self, $attr) = @_;
210    
211     my $abs_uri = $resolve->($attr->value, $attr);
212    
213 wakaba 1.10 ## TODO: check as RDF URI reference
214 wakaba 1.5 Whatpm::URIChecker->check_iri_reference ($abs_uri, sub {
215 wakaba 1.9 $self->{onerror}->(@_, node => $attr);
216 wakaba 1.10 }, $self->{level});
217 wakaba 1.5
218     return $abs_uri;
219 wakaba 1.6 }; # $uri_attr
220    
221     my $id_attr = sub {
222     my ($self, $attr) = @_;
223    
224     my $id = $attr->value;
225     unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
226 wakaba 1.9 $self->{onerror}->(type => 'XML 1.0 NCName:syntax error',
227     level => $self->{level}->{rdf_grammer},
228 wakaba 1.6 node => $attr);
229     }
230 wakaba 1.7
231     my $base_uri = $attr->base_uri;
232     if ($self->{id}->{$base_uri}->{$id}) {
233 wakaba 1.9 $self->{onerror}->(type => 'duplicate rdf id',
234     level => $self->{level}->{rdf_lc_must},
235 wakaba 1.7 node => $attr);
236     ## TODO: RDF Validator?
237     } else {
238     $self->{id}->{$base_uri}->{$id} = 1;
239     }
240 wakaba 1.6
241     return $resolve->('#' . $id, $attr);
242 wakaba 1.7 }; # $id_attr
243 wakaba 1.5
244 wakaba 1.7 my $check_local_attr = sub {
245     my ($self, $node, $attr, $attr_xuri) = @_;
246    
247     if ({
248     ID => 1, about => 1, resource => 1, parseType => 1, type => 1,
249     }->{$attr_xuri}) {
250 wakaba 1.9 $self->{onerror}->(type => 'unqualified rdf attr',
251     level => $self->{level}->{should},
252 wakaba 1.7 node => $attr);
253     if ($node->has_attribute_ns ($RDF_URI, $attr_xuri)) {
254 wakaba 1.9 $self->{onerror}->(type => 'duplicate unqualified attr',
255     level => $self->{level}->{rdf_fact},
256 wakaba 1.7 node => $attr);
257     ## NOTE: <? rdfa:bout="" about=""> and such are not catched
258     ## by this check; but who cares? rdfa:bout="" is itself illegal.
259     }
260     $attr_xuri = $RDF_URI . $attr_xuri;
261     } else {
262 wakaba 1.9 $self->{onerror}->(type => 'unqualified attr',
263     level => $self->{level}->{rdf_fact},
264 wakaba 1.7 node => $attr);
265     ## TODO: RDF Validator?
266     }
267    
268     return $attr_xuri;
269     }; # $check_local_attr
270 wakaba 1.5
271 wakaba 1.7 sub convert_node_element ($$;%) {
272     my ($self, $node, %opt) = @_;
273     $opt{language} = '' unless defined $opt{language};
274     ## ISSUE: Not explicitly defined in the spec.
275 wakaba 1.1
276     $check_rdf_namespace->($self, $node);
277    
278     # |nodeElement|
279    
280     my $xuri = $node->manakai_expanded_uri;
281    
282     if ({
283     %coreSyntaxTerms,
284     $RDF_URI . 'li' => 1,
285     %oldTerms,
286     }->{$xuri}) {
287     $self->{onerror}->(type => 'element not allowed',
288 wakaba 1.9 level => $self->{level}->{rdf_grammer},
289 wakaba 1.1 node => $node);
290    
291     ## TODO: W3C RDF Validator: Continue validation, but triples that would
292     ## be generated from the subtree are ignored.
293     }
294    
295     my $subject;
296 wakaba 1.4 my $type_attr;
297 wakaba 1.1 my @prop_attr;
298    
299     for my $attr (@{$node->attributes}) {
300 wakaba 1.7 my $nsuri = $attr->namespace_uri;
301     if (defined $nsuri and
302     $nsuri eq q<http://www.w3.org/XML/1998/namespace> and
303     $attr->manakai_local_name eq 'lang') {
304     $opt{language} = $attr->value;
305     }
306    
307 wakaba 1.1 my $prefix = $attr->prefix;
308     if (defined $prefix) {
309     next if $prefix =~ /^[Xx][Mm][Ll]/;
310     } else {
311     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
312     }
313    
314     $check_rdf_namespace->($self, $attr);
315    
316     my $attr_xuri = $attr->manakai_expanded_uri;
317 wakaba 1.7
318     unless (defined $nsuri) {
319     $attr_xuri = $check_local_attr->($self, $node, $attr, $attr_xuri);
320     }
321    
322 wakaba 1.2 if ($attr_xuri eq $RDF_URI . 'ID') {
323 wakaba 1.1 unless (defined $subject) {
324 wakaba 1.6 $subject = {uri => $id_attr->($self, $attr)};
325 wakaba 1.1 } else {
326 wakaba 1.5 $self->{onerror}->(type => 'attribute not allowed',
327 wakaba 1.9 level => $self->{level}->{rdf_grammer},
328 wakaba 1.5 node => $attr);
329    
330 wakaba 1.1 ## TODO: Ignore triple as W3C RDF Validator does
331     }
332     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
333     unless (defined $subject) {
334 wakaba 1.5 my $id = $attr->value;
335     unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
336 wakaba 1.9 $self->{onerror}->(type => 'XML 1.0 NCName:syntax error',
337     level => $self->{level}->{rdf_grammer},
338 wakaba 1.5 node => $self);
339     }
340    
341     $subject = {bnodeid => $get_bnodeid->($id)};
342 wakaba 1.1 } else {
343 wakaba 1.5 $self->{onerror}->(type => 'attribute not allowed',
344 wakaba 1.9 level => $self->{level}->{rdf_grammer},
345 wakaba 1.5 node => $attr);
346    
347 wakaba 1.1 ## TODO: Ignore triple as W3C RDF Validator does
348     }
349     } elsif ($attr_xuri eq $RDF_URI . 'about') {
350     unless (defined $subject) {
351 wakaba 1.5 $subject = {uri => $uri_attr->($self, $attr)};
352 wakaba 1.1 } else {
353 wakaba 1.5 $self->{onerror}->(type => 'attribute not allowed',
354 wakaba 1.9 level => $self->{level}->{rdf_grammer},
355 wakaba 1.5 node => $attr);
356    
357 wakaba 1.1 ## TODO: Ignore triple as W3C RDF Validator does
358     }
359     } elsif ($attr_xuri eq $RDF_URI . 'type') {
360 wakaba 1.4 $type_attr = $attr;
361 wakaba 1.1 } elsif ({
362     %coreSyntaxTerms,
363     $RDF_URI . 'li' => 1,
364     $RDF_URI . 'Description' => 1,
365     %oldTerms,
366     }->{$attr_xuri}) {
367     $self->{onerror}->(type => 'attribute not allowed',
368 wakaba 1.9 level => $self->{level}->{rdf_grammer},
369 wakaba 1.1 node => $attr);
370    
371     ## TODO: W3C RDF Validator: Ignore triples
372     } else {
373     push @prop_attr, $attr;
374     }
375     }
376    
377     unless (defined $subject) {
378 wakaba 1.4 $subject = {bnodeid => $generate_bnodeid->($self)};
379 wakaba 1.1 }
380    
381     if ($xuri ne $RDF_URI . 'Description') {
382     $self->{ontriple}->(subject => $subject,
383     predicate => {uri => $RDF_URI . 'type'},
384 wakaba 1.2 object => {uri => $xuri},
385     node => $node);
386 wakaba 1.1 }
387    
388 wakaba 1.4 if ($type_attr) {
389 wakaba 1.1 $self->{ontriple}->(subject => $subject,
390     predicate => {uri => $RDF_URI . 'type'},
391 wakaba 1.4 object => {uri => $resolve->($type_attr->value,
392     $type_attr)},
393     node => $type_attr);
394 wakaba 1.1 }
395    
396     for my $attr (@prop_attr) {
397     $self->{ontriple}->(subject => $subject,
398     predicate => {uri => $attr->manakai_expanded_uri},
399 wakaba 1.7 object => {value => $attr->value,
400     language => $opt{language}},
401 wakaba 1.2 node => $attr);
402 wakaba 1.1 ## TODO: SHOULD in NFC
403     }
404    
405     # |propertyEltList|
406    
407     my $li_counter = 1;
408     for my $cn (@{$node->child_nodes}) {
409     my $cn_type = $cn->node_type;
410     if ($cn_type == $cn->ELEMENT_NODE) {
411     $self->convert_property_element ($cn, li_counter => \$li_counter,
412 wakaba 1.7 subject => $subject,
413     language => $opt{language});
414 wakaba 1.1 } elsif ($cn_type == $cn->TEXT_NODE or
415     $cn_type == $cn->CDATA_SECTION_NODE) {
416     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
417     $self->{onerror}->(type => 'character not allowed',
418 wakaba 1.9 level => $self->{level}->{rdf_grammer},
419 wakaba 1.1 node => $cn);
420     }
421     }
422     }
423    
424     return $subject;
425     } # convert_node_element
426    
427 wakaba 1.4 my $get_id_resource = sub {
428 wakaba 1.5 my $self = shift;
429     my $node = shift;
430 wakaba 1.6
431 wakaba 1.5 return undef unless $node;
432 wakaba 1.6
433     return {uri => $id_attr->($self, $node)};
434 wakaba 1.4 }; # $get_id_resource
435    
436 wakaba 1.1 sub convert_property_element ($$%) {
437     my ($self, $node, %opt) = @_;
438    
439     $check_rdf_namespace->($self, $node);
440    
441     # |propertyElt|
442    
443     my $xuri = $node->manakai_expanded_uri;
444     if ($xuri eq $RDF_URI . 'li') {
445     $xuri = $RDF_URI . '_' . ${$opt{li_counter}}++;
446     }
447    
448     if ({
449     %coreSyntaxTerms,
450     $RDF_URI . 'Description' => 1,
451     %oldTerms,
452     }->{$xuri}) {
453     $self->{onerror}->(type => 'element not allowed',
454 wakaba 1.9 level => $self->{level}->{rdf_grammer},
455 wakaba 1.1 node => $node);
456     ## TODO: RDF Validator?
457     }
458    
459 wakaba 1.6 my $rdf_id_attr;
460 wakaba 1.1 my $dt_attr;
461     my $parse_attr;
462     my $nodeid_attr;
463     my $resource_attr;
464     my @prop_attr;
465     for my $attr (@{$node->attributes}) {
466 wakaba 1.7 my $nsuri = $attr->namespace_uri;
467     if (defined $nsuri and
468     $nsuri eq q<http://www.w3.org/XML/1998/namespace> and
469     $attr->manakai_local_name eq 'lang') {
470     $opt{language} = $attr->value;
471     }
472    
473 wakaba 1.1 my $prefix = $attr->prefix;
474     if (defined $prefix) {
475     next if $prefix =~ /^[Xx][Mm][Ll]/;
476     } else {
477     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
478     }
479    
480     $check_rdf_namespace->($self, $attr);
481    
482     my $attr_xuri = $attr->manakai_expanded_uri;
483 wakaba 1.7
484     unless (defined $nsuri) {
485     $attr_xuri = $check_local_attr->($self, $node, $attr, $attr_xuri);
486     }
487    
488 wakaba 1.1 if ($attr_xuri eq $RDF_URI . 'ID') {
489 wakaba 1.6 $rdf_id_attr = $attr;
490 wakaba 1.1 } elsif ($attr_xuri eq $RDF_URI . 'datatype') {
491     $dt_attr = $attr;
492     } elsif ($attr_xuri eq $RDF_URI . 'parseType') {
493     $parse_attr = $attr;
494     } elsif ($attr_xuri eq $RDF_URI . 'resource') {
495     $resource_attr = $attr;
496     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
497     $nodeid_attr = $attr;
498     } elsif ({
499     %coreSyntaxTerms,
500     $RDF_URI . 'li' => 1,
501     $RDF_URI . 'Description' => 1,
502     %oldTerms,
503     }->{$attr_xuri}) {
504     $self->{onerror}->(type => 'attribute not allowed',
505 wakaba 1.9 level => $self->{level}->{rdf_grammer},
506 wakaba 1.1 node => $attr);
507     ## TODO: RDF Validator?
508     } else {
509     push @prop_attr, $attr;
510     }
511     }
512    
513     my $parse = $parse_attr ? $parse_attr->value : '';
514     if ($parse eq 'Resource') {
515     # |parseTypeResourcePropertyElt|
516    
517     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
518 wakaba 1.2 next unless $attr;
519 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
520 wakaba 1.9 level => $self->{level}->{rdf_grammer},
521 wakaba 1.1 node => $attr);
522     ## TODO: RDF Validator?
523     }
524    
525 wakaba 1.4 my $object = {bnodeid => $generate_bnodeid->($self)};
526 wakaba 1.1 $self->{ontriple}->(subject => $opt{subject},
527     predicate => {uri => $xuri},
528 wakaba 1.2 object => $object,
529 wakaba 1.3 node => $node,
530 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
531 wakaba 1.1
532     ## As if nodeElement
533    
534     # |propertyEltList|
535    
536     my $li_counter = 1;
537     for my $cn (@{$node->child_nodes}) {
538     my $cn_type = $cn->node_type;
539     if ($cn_type == $cn->ELEMENT_NODE) {
540     $self->convert_property_element ($cn, li_counter => \$li_counter,
541 wakaba 1.7 subject => $object,
542     language => $opt{language});
543 wakaba 1.1 } elsif ($cn_type == $cn->TEXT_NODE or
544     $cn_type == $cn->CDATA_SECTION_NODE) {
545     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
546     $self->{onerror}->(type => 'character not allowed',
547 wakaba 1.9 level => $self->{level}->{rdf_grammer},
548 wakaba 1.1 node => $cn);
549     }
550     }
551     }
552     } elsif ($parse eq 'Collection') {
553     # |parseTypeCollectionPropertyElt|
554    
555     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
556 wakaba 1.2 next unless $attr;
557 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
558 wakaba 1.9 level => $self->{level}->{rdf_grammer},
559 wakaba 1.1 node => $attr);
560     ## TODO: RDF Validator?
561     }
562    
563     # |nodeElementList|
564     my @resource;
565     for my $cn (@{$node->child_nodes}) {
566     if ($cn->node_type == $cn->ELEMENT_NODE) {
567     push @resource, [$self->convert_node_element ($cn),
568 wakaba 1.4 {bnodeid => $generate_bnodeid->($self)},
569 wakaba 1.2 $cn];
570 wakaba 1.1 } elsif ($cn->node_type == $cn->TEXT_NODE or
571     $cn->node_type == $cn->CDATA_SECTION_NODE) {
572     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
573     $self->{onerror}->(type => 'character not allowed',
574 wakaba 1.9 level => $self->{level}->{rdf_grammer},
575 wakaba 1.1 node => $cn);
576     }
577     }
578     }
579    
580     if (@resource) {
581     $self->{ontriple}->(subject => $opt{subject},
582     predicate => {uri => $xuri},
583 wakaba 1.2 object => $resource[0]->[1],
584     node => $node);
585 wakaba 1.1 } else {
586     $self->{ontriple}->(subject => $opt{subject},
587     predicate => {uri => $xuri},
588 wakaba 1.2 object => {uri => $RDF_URI . 'nil'},
589 wakaba 1.3 node => $node,
590 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
591 wakaba 1.1 }
592 wakaba 1.3
593 wakaba 1.1 while (@resource) {
594     my $resource = shift @resource;
595     $self->{ontriple}->(subject => $resource->[1],
596     predicate => {uri => $RDF_URI . 'first'},
597 wakaba 1.2 object => $resource->[0],
598     node => $resource->[2]);
599 wakaba 1.1 if (@resource) {
600     $self->{ontriple}->(subject => $resource->[1],
601     predicate => {uri => $RDF_URI . 'rest'},
602 wakaba 1.2 object => $resource[0]->[1],
603     node => $resource->[2]);
604 wakaba 1.1 } else {
605     $self->{ontriple}->(subject => $resource->[1],
606     predicate => {uri => $RDF_URI . 'rest'},
607 wakaba 1.2 object => {uri => $RDF_URI . 'nil'},
608     node => $resource->[2]);
609 wakaba 1.1 }
610     }
611     } elsif ($parse_attr) {
612     # |parseTypeLiteralPropertyElt|
613 wakaba 1.5
614     if ($parse ne 'Literal') {
615     # |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does?
616    
617     $self->{onerror}->(type => 'parse type other',
618 wakaba 1.9 level => $self->{level}->{rdf_info},
619 wakaba 1.5 node => $parse_attr);
620     }
621 wakaba 1.1
622     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
623 wakaba 1.2 next unless $attr;
624 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
625 wakaba 1.9 level => $self->{level}->{rdf_grammer},
626 wakaba 1.1 node => $attr);
627     ## TODO: RDF Validator?
628     }
629    
630     my $value = [@{$node->child_nodes}];
631     ## TODO: Callback for validation
632     ## TODO: Serialized form SHOULD be in NFC.
633    
634     $self->{ontriple}->(subject => $opt{subject},
635     predicate => {uri => $xuri},
636     object => {nodes => $value,
637 wakaba 1.2 datatype => $RDF_URI . 'XMLLiteral'},
638 wakaba 1.3 node => $node,
639 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
640 wakaba 1.1 } else {
641     my $mode = 'unknown';
642    
643     if ($dt_attr) {
644     $mode = 'literal'; # |literalPropertyElt|
645     ## TODO: What RDF Validator does for |< rdf:datatype><el/></>|?
646     }
647     ## TODO: What RDF Validator does for |< prop-attr><non-empty/></>|?
648    
649     my $node_element;
650     my $text = '';
651     for my $cn (@{$node->child_nodes}) {
652     my $cn_type = $cn->node_type;
653     if ($cn_type == $cn->ELEMENT_NODE) {
654     unless ($node_element) {
655     $node_element = $cn;
656     if ({
657     resource => 1, unknown => 1, 'literal-or-resource' => 1,
658     }->{$mode}) {
659     $mode = 'resource';
660     } else {
661     $self->{onerror}->(type => 'element not allowed',
662 wakaba 1.9 level => $self->{level}->{rdf_grammer},
663 wakaba 1.1 node => $cn);
664     ## TODO: RDF Validator?
665     }
666     } else {
667     ## TODO: What RDF Validator does?
668     $self->{onerror}->(type => 'second node element',
669 wakaba 1.9 level => $self->{level}->{rdf_grammer},
670 wakaba 1.1 node => $cn);
671     }
672     } elsif ($cn_type == $cn->TEXT_NODE or
673     $cn_type == $cn->CDATA_SECTION_NODE) {
674     my $data = $cn->data;
675     $text .= $data;
676     if ($data =~ /[^\x09\x0A\x0D\x20]/) {
677     if ({
678     literal => 1, unknown => 1, 'literal-or-resource' => 1,
679     }->{$mode}) {
680     $mode = 'literal';
681     } else {
682     $self->{onerror}->(type => 'character not allowed',
683 wakaba 1.9 level => $self->{level}->{rdf_grammer},
684 wakaba 1.1 node => $cn);
685     ## TODO: RDF Validator?
686     }
687     } else {
688     if ($mode eq 'unknown') {
689     $mode = 'literal-or-resource';
690     } else {
691     #
692     }
693     }
694     }
695     }
696    
697     if ($mode eq 'resource') {
698     # |resourcePropertyElt|
699    
700     for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) {
701 wakaba 1.2 next unless $attr;
702 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
703 wakaba 1.9 level => $self->{level}->{rdf_grammer},
704 wakaba 1.1 node => $attr);
705     ## TODO: RDF Validator?
706     }
707    
708 wakaba 1.7 my $object = $self->convert_node_element ($node_element,
709     language => $opt{language});
710 wakaba 1.1
711     $self->{ontriple}->(subject => $opt{subject},
712     predicate => {uri => $xuri},
713 wakaba 1.2 object => $object,
714 wakaba 1.3 node => $node,
715 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
716 wakaba 1.1 } elsif ($mode eq 'literal' or $mode eq 'literal-or-resource') {
717     # |literalPropertyElt|
718    
719     for my $attr (@prop_attr, $resource_attr, $nodeid_attr) {
720 wakaba 1.2 next unless $attr;
721 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
722 wakaba 1.9 level => $self->{level}->{rdf_grammer},
723 wakaba 1.1 node => $attr);
724     ## TODO: RDF Validator?
725     }
726    
727     ## TODO: $text SHOULD be in NFC
728    
729     if ($dt_attr) {
730 wakaba 1.5 $self->{ontriple}
731     ->(subject => $opt{subject},
732     predicate => {uri => $xuri},
733     object => {value => $text,
734 wakaba 1.10 datatype => $uri_attr->($self, $dt_attr)},
735 wakaba 1.5 ## ISSUE: No resolve() in the spec (but spec says that
736     ## xml:base is applied also to rdf:datatype).
737     node => $node,
738 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
739 wakaba 1.1 } else {
740     $self->{ontriple}->(subject => $opt{subject},
741     predicate => {uri => $xuri},
742     object => {value => $text,
743 wakaba 1.7 language => $opt{language}},
744 wakaba 1.3 node => $node,
745 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
746 wakaba 1.1 }
747     } else {
748     ## |emptyPropertyElt|
749    
750     for my $attr ($dt_attr) {
751 wakaba 1.2 next unless $attr;
752 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
753 wakaba 1.9 level => $self->{level}->{rdf_grammer},
754 wakaba 1.1 node => $attr);
755     ## TODO: RDF Validator?
756     }
757    
758     if (not $resource_attr and not $nodeid_attr and not @prop_attr) {
759     $self->{ontriple}->(subject => $opt{subject},
760     predicate => {uri => $xuri},
761     object => {value => '',
762 wakaba 1.7 language => $opt{language}},
763 wakaba 1.3 node => $node,
764 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
765 wakaba 1.1 } else {
766     my $object;
767     if ($resource_attr) {
768 wakaba 1.5 $object = {uri => $uri_attr->($self, $resource_attr)};
769     if (defined $nodeid_attr) {
770     $self->{onerror}->(type => 'attribute not allowed',
771 wakaba 1.9 level => $self->{level}->{rdf_grammer},
772 wakaba 1.5 node => $nodeid_attr);
773     ## TODO: RDF Validator?
774     }
775 wakaba 1.1 } elsif ($nodeid_attr) {
776 wakaba 1.5 my $id = $nodeid_attr->value;
777     unless ($id =~ /\A\p{InXML_NCNameStartChar10}\p{InXMLNCNameChar10}*\z/) {
778 wakaba 1.9 $self->{onerror}->(type => 'XML 1.0 NCName:syntax error',
779     level => $self->{level}->{rdf_grammer},
780 wakaba 1.5 node => $self);
781     }
782     $object = {bnodeid => $get_bnodeid->($id)};
783 wakaba 1.1 } else {
784 wakaba 1.4 $object = {bnodeid => $generate_bnodeid->($self)};
785 wakaba 1.1 }
786    
787     for my $attr (@prop_attr) {
788     my $attr_xuri = $attr->manakai_expanded_uri;
789     if ($attr_xuri eq $RDF_URI . 'type') {
790     $self->{ontriple}->(subject => $object,
791     predicate => {uri => $attr_xuri},
792 wakaba 1.4 object => $resolve->($attr->value, $attr),
793 wakaba 1.2 node => $attr);
794 wakaba 1.1 } else {
795     ## TODO: SHOULD be in NFC
796     $self->{ontriple}->(subject => $object,
797     predicate => {uri => $attr_xuri},
798     object => {value => $attr->value,
799 wakaba 1.7 language => $opt{language}},
800 wakaba 1.2 node => $attr);
801 wakaba 1.1 }
802     }
803    
804     $self->{ontriple}->(subject => $opt{subject},
805     predicate => {uri => $xuri},
806 wakaba 1.2 object => $object,
807 wakaba 1.3 node => $node,
808 wakaba 1.6 id => $get_id_resource->($self, $rdf_id_attr));
809 wakaba 1.1 }
810     }
811     }
812     } # convert_property_element
813 wakaba 1.10
814     ## TODO: We need to normalize language tags into lowercase (see RDF abstract
815     ## syntax spec) before invoking callbak. In addition, we need to validate
816     ## them against RFC 3066 (fact-level, referencing abstract syntax spec).
817 wakaba 1.1
818     1;
819    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24