/[suikacvs]/markup/html/whatpm/Whatpm/RDFXML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/RDFXML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Fri Mar 21 08:58:35 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +50 -19 lines
++ whatpm/Whatpm/ChangeLog	21 Mar 2008 08:58:05 -0000
	* RDFXML.pm: s/id/ID/ for attribute name.
	The |node| arguments are added for |ontriple| calls.
	Too many "attribute not allowed" errors were raised.

	* ContentChecker.pm: Initial experimental support for rdf:RDF
	element.

2008-03-21  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::RDFXML;
2     use strict;
3    
4     ## NOTE: EntityReference nodes are not supported except that
5     ## unexpected entity reference are simply ignored. (In fact
6     ## all EntityRefernce nodes are ignored.)
7    
8 wakaba 1.2 ## TODO: Add a callback function invoked for every element
9     ## when XMLCC is implemented in WDCC.
10    
11     ## ISSUE: <html:nest/> in RDF subtree?
12    
13     ## ISSUE: PIs in RDF subtree should be validated?
14    
15 wakaba 1.1 my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>;
16    
17     sub new ($) {
18     my $self = bless {fact_level => 'm', grammer_level => 'm'}, shift;
19     $self->{onerror} = sub {
20     my %opt = @_;
21     warn $opt{type}, "\n";
22     };
23     $self->{ontriple} = sub {
24     my %opt = @_;
25     my $dump_resource = sub {
26     my $resource = shift;
27     if ($resource->{uri}) {
28     return '<' . $resource->{uri} . '>';
29     } elsif ($resource->{bnodeid}) {
30     return '_:' . $resource->{bnodeid};
31     } elsif ($resource->{nodes}) {
32     return '"' . join ('', map {$_->inner_html} @{$resource->{nodes}}) .
33     '"^^<' . $resource->{datatype} . '>';
34     } elsif (defined $resource->{value}) {
35     return '"' . $resource->{value} . '"' .
36     (defined $resource->{datatype}
37     ? '^^<' . $resource->{datatype} . '>'
38     : '@' . $resource->{language});
39     } else {
40     return '??';
41     }
42     };
43     print STDERR $dump_resource->($opt{subject}) . ' ';
44     print STDERR $dump_resource->($opt{predicate}) . ' ';
45     print STDERR $dump_resource->($opt{object}) . "\n";
46     };
47     return $self;
48     } # new
49    
50     sub convert_document ($$) {
51     my $self = shift;
52     my $node = shift; # Document
53    
54     ## ISSUE: An RDF/XML document, either |doc| or |nodeElement|
55     ## is allowed as a starting production. However, |nodeElement|
56     ## is not a Root Event.
57    
58     my $has_element;
59    
60     for my $cn (@{$node->child_nodes}) {
61     if ($cn->node_type == $cn->ELEMENT_NODE) {
62     unless ($has_element) {
63     if ($cn->manakai_expanded_uri eq $RDF_URI . q<RDF>) {
64     $self->convert_rdf_element ($cn);
65     } else {
66     $self->convert_rdf_node_element ($cn);
67     }
68     $has_element = 1;
69     } else {
70     $self->{onerror}->(type => 'second node element',
71     level => $self->{grammer_level},
72     node => $cn);
73     }
74     } elsif ($cn->node_type == $cn->TEXT_NODE or
75     $cn->node_type == $cn->CDATA_SECTION_NODE) {
76     $self->{onerror}->(type => 'character not allowed',
77     level => $self->{grammer_level},
78     node => $cn);
79     }
80     }
81     } # convert_document
82    
83     my $check_rdf_namespace = sub {
84     my $self = shift;
85     my $node = shift;
86     my $node_nsuri = $node->namespace_uri;
87     return unless defined $node_nsuri;
88     if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and
89     length $RDF_URI < length $node_nsuri) {
90     $self->{onerror}->(type => 'bad rdf namespace',
91     level => $self->{fact_level}, # Section 5.1
92     node => $node);
93     }
94     }; # $check_rdf_namespace
95    
96     sub convert_rdf_element ($$) {
97     my ($self, $node) = @_;
98    
99     $check_rdf_namespace->($self, $node);
100    
101     # |RDF|
102    
103     for my $attr (@{$node->attributes}) {
104     my $prefix = $attr->prefix;
105     if (defined $prefix) {
106     next if $prefix =~ /^[Xx][Mm][Ll]/;
107     } else {
108     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
109     }
110    
111     $check_rdf_namespace->($self, $attr);
112     $self->{onerror}->(type => 'attribute not allowed',
113     level => $self->{grammer_level},
114     node => $attr);
115     }
116    
117     # |nodeElementList|
118     for my $cn (@{$node->child_nodes}) {
119     if ($cn->node_type == $cn->ELEMENT_NODE) {
120     $self->convert_node_element ($cn);
121     } elsif ($cn->node_type == $cn->TEXT_NODE or
122     $cn->node_type == $cn->CDATA_SECTION_NODE) {
123     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
124     $self->{onerror}->(type => 'character not allowed',
125     level => $self->{grammer_level},
126     node => $cn);
127     }
128     }
129     }
130     } # convert_rdf_element
131    
132     my %coreSyntaxTerms = (
133     $RDF_URI . 'RDF' => 1,
134     $RDF_URI . 'ID' => 1,
135     $RDF_URI . 'about' => 1,
136     $RDF_URI . 'parseType' => 1,
137     $RDF_URI . 'resource' => 1,
138     $RDF_URI . 'nodeID' => 1,
139     $RDF_URI . 'datatype' => 1,
140     );
141    
142     my %oldTerms = (
143     $RDF_URI . 'aboutEach' => 1,
144     $RDF_URI . 'aboutEachPrefix' => 1,
145     $RDF_URI . 'bagID' => 1,
146     );
147    
148     sub convert_node_element ($$) {
149     my ($self, $node) = @_;
150    
151     $check_rdf_namespace->($self, $node);
152    
153     # |nodeElement|
154    
155     my $xuri = $node->manakai_expanded_uri;
156    
157     if ({
158     %coreSyntaxTerms,
159     $RDF_URI . 'li' => 1,
160     %oldTerms,
161     }->{$xuri}) {
162     $self->{onerror}->(type => 'element not allowed',
163     level => $self->{grammer_level},
164     node => $node);
165    
166     ## TODO: W3C RDF Validator: Continue validation, but triples that would
167     ## be generated from the subtree are ignored.
168     }
169    
170     my $subject;
171     my $rdf_type_attr;
172     my @prop_attr;
173    
174     for my $attr (@{$node->attributes}) {
175     my $prefix = $attr->prefix;
176     if (defined $prefix) {
177     next if $prefix =~ /^[Xx][Mm][Ll]/;
178     } else {
179     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
180     }
181    
182     $check_rdf_namespace->($self, $attr);
183    
184     my $attr_xuri = $attr->manakai_expanded_uri;
185 wakaba 1.2 if ($attr_xuri eq $RDF_URI . 'ID') {
186 wakaba 1.1 unless (defined $subject) {
187     $subject = {uri => '#' . $attr->value}; ## TODO: resolve()
188     } else {
189     ## TODO: Ignore triple as W3C RDF Validator does
190     }
191     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
192     unless (defined $subject) {
193     $subject = {bnodeid => '## TODO: bnode: ' . $attr->value};
194     } else {
195     ## TODO: Ignore triple as W3C RDF Validator does
196     }
197     } elsif ($attr_xuri eq $RDF_URI . 'about') {
198     unless (defined $subject) {
199     $subject = {uri => $attr->value}; ## TODO: resolve
200     } else {
201     ## TODO: Ignore triple as W3C RDF Validator does
202     }
203     } elsif ($attr_xuri eq $RDF_URI . 'type') {
204     $rdf_type_attr = $attr;
205     } elsif ({
206     %coreSyntaxTerms,
207     $RDF_URI . 'li' => 1,
208     $RDF_URI . 'Description' => 1,
209     %oldTerms,
210     }->{$attr_xuri}) {
211     $self->{onerror}->(type => 'attribute not allowed',
212     level => $self->{grammer_level},
213     node => $attr);
214    
215     ## TODO: W3C RDF Validator: Ignore triples
216     } else {
217     push @prop_attr, $attr;
218     }
219     }
220    
221     unless (defined $subject) {
222     $subject = {bnodeid => '## TODO: new bnodeid'};
223     }
224    
225     if ($xuri ne $RDF_URI . 'Description') {
226     $self->{ontriple}->(subject => $subject,
227     predicate => {uri => $RDF_URI . 'type'},
228 wakaba 1.2 object => {uri => $xuri},
229     node => $node);
230 wakaba 1.1 }
231    
232     if ($rdf_type_attr) {
233     $self->{ontriple}->(subject => $subject,
234     predicate => {uri => $RDF_URI . 'type'},
235 wakaba 1.2 object => {uri => $rdf_type_attr->value}, ## TODO: resolve
236     node => $rdf_type_attr);
237 wakaba 1.1 }
238    
239     for my $attr (@prop_attr) {
240     $self->{ontriple}->(subject => $subject,
241     predicate => {uri => $attr->manakai_expanded_uri},
242 wakaba 1.2 object => {value => $attr->value}, ## TODO: language
243     node => $attr);
244 wakaba 1.1 ## TODO: SHOULD in NFC
245     }
246    
247     # |propertyEltList|
248    
249     my $li_counter = 1;
250     for my $cn (@{$node->child_nodes}) {
251     my $cn_type = $cn->node_type;
252     if ($cn_type == $cn->ELEMENT_NODE) {
253     $self->convert_property_element ($cn, li_counter => \$li_counter,
254     subject => $subject);
255     } elsif ($cn_type == $cn->TEXT_NODE or
256     $cn_type == $cn->CDATA_SECTION_NODE) {
257     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
258     $self->{onerror}->(type => 'character not allowed',
259     level => $self->{grammer_level},
260     node => $cn);
261     }
262     }
263     }
264    
265     return $subject;
266     } # convert_node_element
267    
268     sub convert_property_element ($$%) {
269     my ($self, $node, %opt) = @_;
270    
271     $check_rdf_namespace->($self, $node);
272    
273     # |propertyElt|
274    
275     my $xuri = $node->manakai_expanded_uri;
276     if ($xuri eq $RDF_URI . 'li') {
277     $xuri = $RDF_URI . '_' . ${$opt{li_counter}}++;
278     }
279    
280     if ({
281     %coreSyntaxTerms,
282     $RDF_URI . 'Description' => 1,
283     %oldTerms,
284     }->{$xuri}) {
285     $self->{onerror}->(type => 'element not allowed',
286     level => $self->{grammer_level},
287     node => $node);
288     ## TODO: RDF Validator?
289     }
290    
291     my $id_attr;
292     my $dt_attr;
293     my $parse_attr;
294     my $nodeid_attr;
295     my $resource_attr;
296     my @prop_attr;
297     for my $attr (@{$node->attributes}) {
298     my $prefix = $attr->prefix;
299     if (defined $prefix) {
300     next if $prefix =~ /^[Xx][Mm][Ll]/;
301     } else {
302     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
303     }
304    
305     $check_rdf_namespace->($self, $attr);
306    
307     my $attr_xuri = $attr->manakai_expanded_uri;
308     if ($attr_xuri eq $RDF_URI . 'ID') {
309     $id_attr = $attr;
310     } elsif ($attr_xuri eq $RDF_URI . 'datatype') {
311     $dt_attr = $attr;
312     } elsif ($attr_xuri eq $RDF_URI . 'parseType') {
313     $parse_attr = $attr;
314     } elsif ($attr_xuri eq $RDF_URI . 'resource') {
315     $resource_attr = $attr;
316     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
317     $nodeid_attr = $attr;
318     } elsif ({
319     %coreSyntaxTerms,
320     $RDF_URI . 'li' => 1,
321     $RDF_URI . 'Description' => 1,
322     %oldTerms,
323     }->{$attr_xuri}) {
324     $self->{onerror}->(type => 'attribute not allowed',
325     level => $self->{grammer_level},
326     node => $attr);
327     ## TODO: RDF Validator?
328     } else {
329     push @prop_attr, $attr;
330     }
331     }
332    
333     my $parse = $parse_attr ? $parse_attr->value : '';
334     if ($parse eq 'Resource') {
335     # |parseTypeResourcePropertyElt|
336    
337     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
338 wakaba 1.2 next unless $attr;
339 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
340     level => $self->{grammer_level},
341     node => $attr);
342     ## TODO: RDF Validator?
343     }
344    
345     my $object = {bnodeid => '## TODO: generate bnodeid'};
346     $self->{ontriple}->(subject => $opt{subject},
347     predicate => {uri => $xuri},
348 wakaba 1.2 object => $object,
349     node => $node);
350 wakaba 1.1 ## TODO: reification
351    
352     ## As if nodeElement
353    
354     # |propertyEltList|
355    
356     my $li_counter = 1;
357     for my $cn (@{$node->child_nodes}) {
358     my $cn_type = $cn->node_type;
359     if ($cn_type == $cn->ELEMENT_NODE) {
360     $self->convert_property_element ($cn, li_counter => \$li_counter,
361     subject => $object);
362     } elsif ($cn_type == $cn->TEXT_NODE or
363     $cn_type == $cn->CDATA_SECTION_NODE) {
364     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
365     $self->{onerror}->(type => 'character not allowed',
366     level => $self->{grammer_level},
367     node => $cn);
368     }
369     }
370     }
371     } elsif ($parse eq 'Collection') {
372     # |parseTypeCollectionPropertyElt|
373    
374     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
375 wakaba 1.2 next unless $attr;
376 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
377     level => $self->{grammer_level},
378     node => $attr);
379     ## TODO: RDF Validator?
380     }
381    
382     # |nodeElementList|
383     my @resource;
384     for my $cn (@{$node->child_nodes}) {
385     if ($cn->node_type == $cn->ELEMENT_NODE) {
386     push @resource, [$self->convert_node_element ($cn),
387 wakaba 1.2 {bnodeid => '## TODO: bnodeid generated'},
388     $cn];
389 wakaba 1.1 } elsif ($cn->node_type == $cn->TEXT_NODE or
390     $cn->node_type == $cn->CDATA_SECTION_NODE) {
391     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
392     $self->{onerror}->(type => 'character not allowed',
393     level => $self->{grammer_level},
394     node => $cn);
395     }
396     }
397     }
398    
399     if (@resource) {
400     $self->{ontriple}->(subject => $opt{subject},
401     predicate => {uri => $xuri},
402 wakaba 1.2 object => $resource[0]->[1],
403     node => $node);
404 wakaba 1.1 } else {
405     $self->{ontriple}->(subject => $opt{subject},
406     predicate => {uri => $xuri},
407 wakaba 1.2 object => {uri => $RDF_URI . 'nil'},
408     node => $node);
409 wakaba 1.1 }
410     ## TODO: reification
411    
412     while (@resource) {
413     my $resource = shift @resource;
414     $self->{ontriple}->(subject => $resource->[1],
415     predicate => {uri => $RDF_URI . 'first'},
416 wakaba 1.2 object => $resource->[0],
417     node => $resource->[2]);
418 wakaba 1.1 if (@resource) {
419     $self->{ontriple}->(subject => $resource->[1],
420     predicate => {uri => $RDF_URI . 'rest'},
421 wakaba 1.2 object => $resource[0]->[1],
422     node => $resource->[2]);
423 wakaba 1.1 } else {
424     $self->{ontriple}->(subject => $resource->[1],
425     predicate => {uri => $RDF_URI . 'rest'},
426 wakaba 1.2 object => {uri => $RDF_URI . 'nil'},
427     node => $resource->[2]);
428 wakaba 1.1 }
429     }
430     } elsif ($parse_attr) {
431     # |parseTypeLiteralPropertyElt|
432     # |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does?
433    
434     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
435 wakaba 1.2 next unless $attr;
436 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
437     level => $self->{grammer_level},
438     node => $attr);
439     ## TODO: RDF Validator?
440     }
441    
442     my $value = [@{$node->child_nodes}];
443     ## TODO: Callback for validation
444     ## TODO: Serialized form SHOULD be in NFC.
445    
446     $self->{ontriple}->(subject => $opt{subject},
447     predicate => {uri => $xuri},
448     object => {nodes => $value,
449 wakaba 1.2 datatype => $RDF_URI . 'XMLLiteral'},
450     node => $node);
451 wakaba 1.1 ## TODO: reification
452     } else {
453     my $mode = 'unknown';
454    
455     if ($dt_attr) {
456     $mode = 'literal'; # |literalPropertyElt|
457     ## TODO: What RDF Validator does for |< rdf:datatype><el/></>|?
458     }
459     ## TODO: What RDF Validator does for |< prop-attr><non-empty/></>|?
460    
461     my $node_element;
462     my $text = '';
463     for my $cn (@{$node->child_nodes}) {
464     my $cn_type = $cn->node_type;
465     if ($cn_type == $cn->ELEMENT_NODE) {
466     unless ($node_element) {
467     $node_element = $cn;
468     if ({
469     resource => 1, unknown => 1, 'literal-or-resource' => 1,
470     }->{$mode}) {
471     $mode = 'resource';
472     } else {
473     $self->{onerror}->(type => 'element not allowed',
474     level => $self->{grammer_level},
475     node => $cn);
476     ## TODO: RDF Validator?
477     }
478     } else {
479     ## TODO: What RDF Validator does?
480     $self->{onerror}->(type => 'second node element',
481     level => $self->{grammer_level},
482     node => $cn);
483     }
484     } elsif ($cn_type == $cn->TEXT_NODE or
485     $cn_type == $cn->CDATA_SECTION_NODE) {
486     my $data = $cn->data;
487     $text .= $data;
488     if ($data =~ /[^\x09\x0A\x0D\x20]/) {
489     if ({
490     literal => 1, unknown => 1, 'literal-or-resource' => 1,
491     }->{$mode}) {
492     $mode = 'literal';
493     } else {
494     $self->{onerror}->(type => 'character not allowed',
495     level => $self->{grammer_level},
496     node => $cn);
497     ## TODO: RDF Validator?
498     }
499     } else {
500     if ($mode eq 'unknown') {
501     $mode = 'literal-or-resource';
502     } else {
503     #
504     }
505     }
506     }
507     }
508    
509     if ($mode eq 'resource') {
510     # |resourcePropertyElt|
511    
512     for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) {
513 wakaba 1.2 next unless $attr;
514 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
515     level => $self->{grammer_level},
516     node => $attr);
517     ## TODO: RDF Validator?
518     }
519    
520     my $object = $self->convert_node_element ($node_element);
521    
522     $self->{ontriple}->(subject => $opt{subject},
523     predicate => {uri => $xuri},
524 wakaba 1.2 object => $object,
525     node => $node);
526 wakaba 1.1
527     ## TODO: reification
528     } elsif ($mode eq 'literal' or $mode eq 'literal-or-resource') {
529     # |literalPropertyElt|
530    
531     for my $attr (@prop_attr, $resource_attr, $nodeid_attr) {
532 wakaba 1.2 next unless $attr;
533 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
534     level => $self->{grammer_level},
535     node => $attr);
536     ## TODO: RDF Validator?
537     }
538    
539     ## TODO: $text SHOULD be in NFC
540    
541     if ($dt_attr) {
542     $self->{ontriple}->(subject => $opt{subject},
543     predicate => {uri => $xuri},
544     object => {value => $text,
545 wakaba 1.2 datatype => $dt_attr->value},
546     node => $node);
547 wakaba 1.1 } else {
548     $self->{ontriple}->(subject => $opt{subject},
549     predicate => {uri => $xuri},
550     object => {value => $text,
551     ## TODO: language
552 wakaba 1.2 },
553     node => $node);
554 wakaba 1.1 }
555    
556     ## TODO: reification
557     } else {
558     ## |emptyPropertyElt|
559    
560     for my $attr ($dt_attr) {
561 wakaba 1.2 next unless $attr;
562 wakaba 1.1 $self->{onerror}->(type => 'attribute not allowed',
563     level => $self->{grammer_level},
564     node => $attr);
565     ## TODO: RDF Validator?
566     }
567    
568     if (not $resource_attr and not $nodeid_attr and not @prop_attr) {
569     $self->{ontriple}->(subject => $opt{subject},
570     predicate => {uri => $xuri},
571     object => {value => '',
572     ## TODO: language
573 wakaba 1.2 },
574     node => $node);
575 wakaba 1.1
576     ## TODO: reification
577     } else {
578     my $object;
579     if ($resource_attr) {
580     $object = {uri => $resource_attr->value}; ## TODO: resolve
581     } elsif ($nodeid_attr) {
582     $object = {bnodeid => $nodeid_attr->value};
583     } else {
584     $object = {bnodeid => '## TODO: generated bnodeid'};
585     }
586    
587     for my $attr (@prop_attr) {
588     my $attr_xuri = $attr->manakai_expanded_uri;
589     if ($attr_xuri eq $RDF_URI . 'type') {
590     $self->{ontriple}->(subject => $object,
591     predicate => {uri => $attr_xuri},
592 wakaba 1.2 object => $attr->value, ## TODO: resolve
593     node => $attr);
594 wakaba 1.1 } else {
595     ## TODO: SHOULD be in NFC
596     $self->{ontriple}->(subject => $object,
597     predicate => {uri => $attr_xuri},
598     object => {value => $attr->value,
599     ## TODO: lang
600 wakaba 1.2 },
601     node => $attr);
602 wakaba 1.1 }
603     }
604    
605     $self->{ontriple}->(subject => $opt{subject},
606     predicate => {uri => $xuri},
607 wakaba 1.2 object => $object,
608     node => $node);
609 wakaba 1.1
610     ## TODO: reification
611     }
612     }
613     }
614     } # convert_property_element
615    
616     1;
617    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24