/[suikacvs]/markup/html/whatpm/Whatpm/RDFXML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/RDFXML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Fri Mar 21 07:56:48 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
++ whatpm/Whatpm/ChangeLog	21 Mar 2008 07:56:31 -0000
2008-03-21  Wakaba  <wakaba@suika.fam.cx>

	* RDFXML.pm: New module.

1 wakaba 1.1 package Whatpm::RDFXML;
2     use strict;
3    
4     ## NOTE: EntityReference nodes are not supported except that
5     ## unexpected entity reference are simply ignored. (In fact
6     ## all EntityRefernce nodes are ignored.)
7    
8     my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>;
9    
10     sub new ($) {
11     my $self = bless {fact_level => 'm', grammer_level => 'm'}, shift;
12     $self->{onerror} = sub {
13     my %opt = @_;
14     warn $opt{type}, "\n";
15     };
16     $self->{ontriple} = sub {
17     my %opt = @_;
18     my $dump_resource = sub {
19     my $resource = shift;
20     if ($resource->{uri}) {
21     return '<' . $resource->{uri} . '>';
22     } elsif ($resource->{bnodeid}) {
23     return '_:' . $resource->{bnodeid};
24     } elsif ($resource->{nodes}) {
25     return '"' . join ('', map {$_->inner_html} @{$resource->{nodes}}) .
26     '"^^<' . $resource->{datatype} . '>';
27     } elsif (defined $resource->{value}) {
28     return '"' . $resource->{value} . '"' .
29     (defined $resource->{datatype}
30     ? '^^<' . $resource->{datatype} . '>'
31     : '@' . $resource->{language});
32     } else {
33     return '??';
34     }
35     };
36     print STDERR $dump_resource->($opt{subject}) . ' ';
37     print STDERR $dump_resource->($opt{predicate}) . ' ';
38     print STDERR $dump_resource->($opt{object}) . "\n";
39     };
40     return $self;
41     } # new
42    
43     sub convert_document ($$) {
44     my $self = shift;
45     my $node = shift; # Document
46    
47     ## ISSUE: An RDF/XML document, either |doc| or |nodeElement|
48     ## is allowed as a starting production. However, |nodeElement|
49     ## is not a Root Event.
50    
51     my $has_element;
52    
53     for my $cn (@{$node->child_nodes}) {
54     if ($cn->node_type == $cn->ELEMENT_NODE) {
55     unless ($has_element) {
56     if ($cn->manakai_expanded_uri eq $RDF_URI . q<RDF>) {
57     $self->convert_rdf_element ($cn);
58     } else {
59     $self->convert_rdf_node_element ($cn);
60     }
61     $has_element = 1;
62     } else {
63     $self->{onerror}->(type => 'second node element',
64     level => $self->{grammer_level},
65     node => $cn);
66     }
67     } elsif ($cn->node_type == $cn->TEXT_NODE or
68     $cn->node_type == $cn->CDATA_SECTION_NODE) {
69     $self->{onerror}->(type => 'character not allowed',
70     level => $self->{grammer_level},
71     node => $cn);
72     }
73     }
74     } # convert_document
75    
76     my $check_rdf_namespace = sub {
77     my $self = shift;
78     my $node = shift;
79     my $node_nsuri = $node->namespace_uri;
80     return unless defined $node_nsuri;
81     if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and
82     length $RDF_URI < length $node_nsuri) {
83     $self->{onerror}->(type => 'bad rdf namespace',
84     level => $self->{fact_level}, # Section 5.1
85     node => $node);
86     }
87     }; # $check_rdf_namespace
88    
89     sub convert_rdf_element ($$) {
90     my ($self, $node) = @_;
91    
92     $check_rdf_namespace->($self, $node);
93    
94     # |RDF|
95    
96     for my $attr (@{$node->attributes}) {
97     my $prefix = $attr->prefix;
98     if (defined $prefix) {
99     next if $prefix =~ /^[Xx][Mm][Ll]/;
100     } else {
101     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
102     }
103    
104     $check_rdf_namespace->($self, $attr);
105     $self->{onerror}->(type => 'attribute not allowed',
106     level => $self->{grammer_level},
107     node => $attr);
108     }
109    
110     # |nodeElementList|
111     for my $cn (@{$node->child_nodes}) {
112     if ($cn->node_type == $cn->ELEMENT_NODE) {
113     $self->convert_node_element ($cn);
114     } elsif ($cn->node_type == $cn->TEXT_NODE or
115     $cn->node_type == $cn->CDATA_SECTION_NODE) {
116     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
117     $self->{onerror}->(type => 'character not allowed',
118     level => $self->{grammer_level},
119     node => $cn);
120     }
121     }
122     }
123     } # convert_rdf_element
124    
125     my %coreSyntaxTerms = (
126     $RDF_URI . 'RDF' => 1,
127     $RDF_URI . 'ID' => 1,
128     $RDF_URI . 'about' => 1,
129     $RDF_URI . 'parseType' => 1,
130     $RDF_URI . 'resource' => 1,
131     $RDF_URI . 'nodeID' => 1,
132     $RDF_URI . 'datatype' => 1,
133     );
134    
135     my %oldTerms = (
136     $RDF_URI . 'aboutEach' => 1,
137     $RDF_URI . 'aboutEachPrefix' => 1,
138     $RDF_URI . 'bagID' => 1,
139     );
140    
141     sub convert_node_element ($$) {
142     my ($self, $node) = @_;
143    
144     $check_rdf_namespace->($self, $node);
145    
146     # |nodeElement|
147    
148     my $xuri = $node->manakai_expanded_uri;
149    
150     if ({
151     %coreSyntaxTerms,
152     $RDF_URI . 'li' => 1,
153     %oldTerms,
154     }->{$xuri}) {
155     $self->{onerror}->(type => 'element not allowed',
156     level => $self->{grammer_level},
157     node => $node);
158    
159     ## TODO: W3C RDF Validator: Continue validation, but triples that would
160     ## be generated from the subtree are ignored.
161     }
162    
163     my $subject;
164     my $rdf_type_attr;
165     my @prop_attr;
166    
167     for my $attr (@{$node->attributes}) {
168     my $prefix = $attr->prefix;
169     if (defined $prefix) {
170     next if $prefix =~ /^[Xx][Mm][Ll]/;
171     } else {
172     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
173     }
174    
175     $check_rdf_namespace->($self, $attr);
176    
177     my $attr_xuri = $attr->manakai_expanded_uri;
178     if ($attr_xuri eq $RDF_URI . 'id') {
179     unless (defined $subject) {
180     $subject = {uri => '#' . $attr->value}; ## TODO: resolve()
181     } else {
182     ## TODO: Ignore triple as W3C RDF Validator does
183     }
184     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
185     unless (defined $subject) {
186     $subject = {bnodeid => '## TODO: bnode: ' . $attr->value};
187     } else {
188     ## TODO: Ignore triple as W3C RDF Validator does
189     }
190     } elsif ($attr_xuri eq $RDF_URI . 'about') {
191     unless (defined $subject) {
192     $subject = {uri => $attr->value}; ## TODO: resolve
193     } else {
194     ## TODO: Ignore triple as W3C RDF Validator does
195     }
196     } elsif ($attr_xuri eq $RDF_URI . 'type') {
197     $rdf_type_attr = $attr;
198     } elsif ({
199     %coreSyntaxTerms,
200     $RDF_URI . 'li' => 1,
201     $RDF_URI . 'Description' => 1,
202     %oldTerms,
203     }->{$attr_xuri}) {
204     $self->{onerror}->(type => 'attribute not allowed',
205     level => $self->{grammer_level},
206     node => $attr);
207    
208     ## TODO: W3C RDF Validator: Ignore triples
209     } else {
210     push @prop_attr, $attr;
211     }
212     }
213    
214     unless (defined $subject) {
215     $subject = {bnodeid => '## TODO: new bnodeid'};
216     }
217    
218     if ($xuri ne $RDF_URI . 'Description') {
219     $self->{ontriple}->(subject => $subject,
220     predicate => {uri => $RDF_URI . 'type'},
221     object => {uri => $xuri});
222     }
223    
224     if ($rdf_type_attr) {
225     $self->{ontriple}->(subject => $subject,
226     predicate => {uri => $RDF_URI . 'type'},
227     object => {uri => $rdf_type_attr->value}); ## TODO: resolve
228     }
229    
230     for my $attr (@prop_attr) {
231     $self->{ontriple}->(subject => $subject,
232     predicate => {uri => $attr->manakai_expanded_uri},
233     object => {value => $attr->value}); ## TODO: language
234     ## TODO: SHOULD in NFC
235     }
236    
237     # |propertyEltList|
238    
239     my $li_counter = 1;
240     for my $cn (@{$node->child_nodes}) {
241     my $cn_type = $cn->node_type;
242     if ($cn_type == $cn->ELEMENT_NODE) {
243     $self->convert_property_element ($cn, li_counter => \$li_counter,
244     subject => $subject);
245     } elsif ($cn_type == $cn->TEXT_NODE or
246     $cn_type == $cn->CDATA_SECTION_NODE) {
247     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
248     $self->{onerror}->(type => 'character not allowed',
249     level => $self->{grammer_level},
250     node => $cn);
251     }
252     }
253     }
254    
255     return $subject;
256     } # convert_node_element
257    
258     sub convert_property_element ($$%) {
259     my ($self, $node, %opt) = @_;
260    
261     $check_rdf_namespace->($self, $node);
262    
263     # |propertyElt|
264    
265     my $xuri = $node->manakai_expanded_uri;
266     if ($xuri eq $RDF_URI . 'li') {
267     $xuri = $RDF_URI . '_' . ${$opt{li_counter}}++;
268     }
269    
270     if ({
271     %coreSyntaxTerms,
272     $RDF_URI . 'Description' => 1,
273     %oldTerms,
274     }->{$xuri}) {
275     $self->{onerror}->(type => 'element not allowed',
276     level => $self->{grammer_level},
277     node => $node);
278     ## TODO: RDF Validator?
279     }
280    
281     my $id_attr;
282     my $dt_attr;
283     my $parse_attr;
284     my $nodeid_attr;
285     my $resource_attr;
286     my @prop_attr;
287     for my $attr (@{$node->attributes}) {
288     my $prefix = $attr->prefix;
289     if (defined $prefix) {
290     next if $prefix =~ /^[Xx][Mm][Ll]/;
291     } else {
292     next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
293     }
294    
295     $check_rdf_namespace->($self, $attr);
296    
297     my $attr_xuri = $attr->manakai_expanded_uri;
298     if ($attr_xuri eq $RDF_URI . 'ID') {
299     $id_attr = $attr;
300     } elsif ($attr_xuri eq $RDF_URI . 'datatype') {
301     $dt_attr = $attr;
302     } elsif ($attr_xuri eq $RDF_URI . 'parseType') {
303     $parse_attr = $attr;
304     } elsif ($attr_xuri eq $RDF_URI . 'resource') {
305     $resource_attr = $attr;
306     } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
307     $nodeid_attr = $attr;
308     } elsif ({
309     %coreSyntaxTerms,
310     $RDF_URI . 'li' => 1,
311     $RDF_URI . 'Description' => 1,
312     %oldTerms,
313     }->{$attr_xuri}) {
314     $self->{onerror}->(type => 'attribute not allowed',
315     level => $self->{grammer_level},
316     node => $attr);
317     ## TODO: RDF Validator?
318     } else {
319     push @prop_attr, $attr;
320     }
321     }
322    
323     my $parse = $parse_attr ? $parse_attr->value : '';
324     if ($parse eq 'Resource') {
325     # |parseTypeResourcePropertyElt|
326    
327     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
328     $self->{onerror}->(type => 'attribute not allowed',
329     level => $self->{grammer_level},
330     node => $attr);
331     ## TODO: RDF Validator?
332     }
333    
334     my $object = {bnodeid => '## TODO: generate bnodeid'};
335     $self->{ontriple}->(subject => $opt{subject},
336     predicate => {uri => $xuri},
337     object => $object);
338     ## TODO: reification
339    
340     ## As if nodeElement
341    
342     # |propertyEltList|
343    
344     my $li_counter = 1;
345     for my $cn (@{$node->child_nodes}) {
346     my $cn_type = $cn->node_type;
347     if ($cn_type == $cn->ELEMENT_NODE) {
348     $self->convert_property_element ($cn, li_counter => \$li_counter,
349     subject => $object);
350     } elsif ($cn_type == $cn->TEXT_NODE or
351     $cn_type == $cn->CDATA_SECTION_NODE) {
352     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
353     $self->{onerror}->(type => 'character not allowed',
354     level => $self->{grammer_level},
355     node => $cn);
356     }
357     }
358     }
359     } elsif ($parse eq 'Collection') {
360     # |parseTypeCollectionPropertyElt|
361    
362     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
363     $self->{onerror}->(type => 'attribute not allowed',
364     level => $self->{grammer_level},
365     node => $attr);
366     ## TODO: RDF Validator?
367     }
368    
369     # |nodeElementList|
370     my @resource;
371     for my $cn (@{$node->child_nodes}) {
372     if ($cn->node_type == $cn->ELEMENT_NODE) {
373     push @resource, [$self->convert_node_element ($cn),
374     {bnodeid => '## TODO: bnodeid generated'}];
375     } elsif ($cn->node_type == $cn->TEXT_NODE or
376     $cn->node_type == $cn->CDATA_SECTION_NODE) {
377     if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
378     $self->{onerror}->(type => 'character not allowed',
379     level => $self->{grammer_level},
380     node => $cn);
381     }
382     }
383     }
384    
385     if (@resource) {
386     $self->{ontriple}->(subject => $opt{subject},
387     predicate => {uri => $xuri},
388     object => $resource[0]->[1]);
389     } else {
390     $self->{ontriple}->(subject => $opt{subject},
391     predicate => {uri => $xuri},
392     object => {uri => $RDF_URI . 'nil'});
393     }
394     ## TODO: reification
395    
396     while (@resource) {
397     my $resource = shift @resource;
398     $self->{ontriple}->(subject => $resource->[1],
399     predicate => {uri => $RDF_URI . 'first'},
400     object => $resource->[0]);
401     if (@resource) {
402     $self->{ontriple}->(subject => $resource->[1],
403     predicate => {uri => $RDF_URI . 'rest'},
404     object => $resource[0]->[1]);
405     } else {
406     $self->{ontriple}->(subject => $resource->[1],
407     predicate => {uri => $RDF_URI . 'rest'},
408     object => {uri => $RDF_URI . 'nil'});
409     }
410     }
411     } elsif ($parse_attr) {
412     # |parseTypeLiteralPropertyElt|
413     # |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does?
414    
415     for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
416     $self->{onerror}->(type => 'attribute not allowed',
417     level => $self->{grammer_level},
418     node => $attr);
419     ## TODO: RDF Validator?
420     }
421    
422     my $value = [@{$node->child_nodes}];
423     ## TODO: Callback for validation
424     ## TODO: Serialized form SHOULD be in NFC.
425    
426     $self->{ontriple}->(subject => $opt{subject},
427     predicate => {uri => $xuri},
428     object => {nodes => $value,
429     datatype => $RDF_URI . 'XMLLiteral'});
430     ## TODO: reification
431     } else {
432     my $mode = 'unknown';
433    
434     if ($dt_attr) {
435     $mode = 'literal'; # |literalPropertyElt|
436     ## TODO: What RDF Validator does for |< rdf:datatype><el/></>|?
437     }
438     ## TODO: What RDF Validator does for |< prop-attr><non-empty/></>|?
439    
440     my $node_element;
441     my $text = '';
442     for my $cn (@{$node->child_nodes}) {
443     my $cn_type = $cn->node_type;
444     if ($cn_type == $cn->ELEMENT_NODE) {
445     unless ($node_element) {
446     $node_element = $cn;
447     if ({
448     resource => 1, unknown => 1, 'literal-or-resource' => 1,
449     }->{$mode}) {
450     $mode = 'resource';
451     } else {
452     $self->{onerror}->(type => 'element not allowed',
453     level => $self->{grammer_level},
454     node => $cn);
455     ## TODO: RDF Validator?
456     }
457     } else {
458     ## TODO: What RDF Validator does?
459     $self->{onerror}->(type => 'second node element',
460     level => $self->{grammer_level},
461     node => $cn);
462     }
463     } elsif ($cn_type == $cn->TEXT_NODE or
464     $cn_type == $cn->CDATA_SECTION_NODE) {
465     my $data = $cn->data;
466     $text .= $data;
467     if ($data =~ /[^\x09\x0A\x0D\x20]/) {
468     if ({
469     literal => 1, unknown => 1, 'literal-or-resource' => 1,
470     }->{$mode}) {
471     $mode = 'literal';
472     } else {
473     $self->{onerror}->(type => 'character not allowed',
474     level => $self->{grammer_level},
475     node => $cn);
476     ## TODO: RDF Validator?
477     }
478     } else {
479     if ($mode eq 'unknown') {
480     $mode = 'literal-or-resource';
481     } else {
482     #
483     }
484     }
485     }
486     }
487    
488     if ($mode eq 'resource') {
489     # |resourcePropertyElt|
490    
491     for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) {
492     $self->{onerror}->(type => 'attribute not allowed',
493     level => $self->{grammer_level},
494     node => $attr);
495     ## TODO: RDF Validator?
496     }
497    
498     my $object = $self->convert_node_element ($node_element);
499    
500     $self->{ontriple}->(subject => $opt{subject},
501     predicate => {uri => $xuri},
502     object => $object);
503    
504     ## TODO: reification
505     } elsif ($mode eq 'literal' or $mode eq 'literal-or-resource') {
506     # |literalPropertyElt|
507    
508     for my $attr (@prop_attr, $resource_attr, $nodeid_attr) {
509     $self->{onerror}->(type => 'attribute not allowed',
510     level => $self->{grammer_level},
511     node => $attr);
512     ## TODO: RDF Validator?
513     }
514    
515     ## TODO: $text SHOULD be in NFC
516    
517     if ($dt_attr) {
518     $self->{ontriple}->(subject => $opt{subject},
519     predicate => {uri => $xuri},
520     object => {value => $text,
521     datatype => $dt_attr->value});
522     } else {
523     $self->{ontriple}->(subject => $opt{subject},
524     predicate => {uri => $xuri},
525     object => {value => $text,
526     ## TODO: language
527     });
528     }
529    
530     ## TODO: reification
531     } else {
532     ## |emptyPropertyElt|
533    
534     for my $attr ($dt_attr) {
535     $self->{onerror}->(type => 'attribute not allowed',
536     level => $self->{grammer_level},
537     node => $attr);
538     ## TODO: RDF Validator?
539     }
540    
541     if (not $resource_attr and not $nodeid_attr and not @prop_attr) {
542     $self->{ontriple}->(subject => $opt{subject},
543     predicate => {uri => $xuri},
544     object => {value => '',
545     ## TODO: language
546     });
547    
548     ## TODO: reification
549     } else {
550     my $object;
551     if ($resource_attr) {
552     $object = {uri => $resource_attr->value}; ## TODO: resolve
553     } elsif ($nodeid_attr) {
554     $object = {bnodeid => $nodeid_attr->value};
555     } else {
556     $object = {bnodeid => '## TODO: generated bnodeid'};
557     }
558    
559     for my $attr (@prop_attr) {
560     my $attr_xuri = $attr->manakai_expanded_uri;
561     if ($attr_xuri eq $RDF_URI . 'type') {
562     $self->{ontriple}->(subject => $object,
563     predicate => {uri => $attr_xuri},
564     object => $attr->value); ## TODO: resolve
565     } else {
566     ## TODO: SHOULD be in NFC
567     $self->{ontriple}->(subject => $object,
568     predicate => {uri => $attr_xuri},
569     object => {value => $attr->value,
570     ## TODO: lang
571     });
572     }
573     }
574    
575     $self->{ontriple}->(subject => $opt{subject},
576     predicate => {uri => $xuri},
577     object => $object);
578    
579     ## TODO: reification
580     }
581     }
582     }
583     } # convert_property_element
584    
585     1;
586    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24