/[suikacvs]/markup/html/whatpm/Whatpm/RDFXML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/RDFXML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Fri Mar 21 07:56:48 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
++ whatpm/Whatpm/ChangeLog	21 Mar 2008 07:56:31 -0000
2008-03-21  Wakaba  <wakaba@suika.fam.cx>

	* RDFXML.pm: New module.

1 package Whatpm::RDFXML;
2 use strict;
3
4 ## NOTE: EntityReference nodes are not supported except that
5 ## unexpected entity reference are simply ignored. (In fact
6 ## all EntityRefernce nodes are ignored.)
7
8 my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>;
9
10 sub new ($) {
11 my $self = bless {fact_level => 'm', grammer_level => 'm'}, shift;
12 $self->{onerror} = sub {
13 my %opt = @_;
14 warn $opt{type}, "\n";
15 };
16 $self->{ontriple} = sub {
17 my %opt = @_;
18 my $dump_resource = sub {
19 my $resource = shift;
20 if ($resource->{uri}) {
21 return '<' . $resource->{uri} . '>';
22 } elsif ($resource->{bnodeid}) {
23 return '_:' . $resource->{bnodeid};
24 } elsif ($resource->{nodes}) {
25 return '"' . join ('', map {$_->inner_html} @{$resource->{nodes}}) .
26 '"^^<' . $resource->{datatype} . '>';
27 } elsif (defined $resource->{value}) {
28 return '"' . $resource->{value} . '"' .
29 (defined $resource->{datatype}
30 ? '^^<' . $resource->{datatype} . '>'
31 : '@' . $resource->{language});
32 } else {
33 return '??';
34 }
35 };
36 print STDERR $dump_resource->($opt{subject}) . ' ';
37 print STDERR $dump_resource->($opt{predicate}) . ' ';
38 print STDERR $dump_resource->($opt{object}) . "\n";
39 };
40 return $self;
41 } # new
42
43 sub convert_document ($$) {
44 my $self = shift;
45 my $node = shift; # Document
46
47 ## ISSUE: An RDF/XML document, either |doc| or |nodeElement|
48 ## is allowed as a starting production. However, |nodeElement|
49 ## is not a Root Event.
50
51 my $has_element;
52
53 for my $cn (@{$node->child_nodes}) {
54 if ($cn->node_type == $cn->ELEMENT_NODE) {
55 unless ($has_element) {
56 if ($cn->manakai_expanded_uri eq $RDF_URI . q<RDF>) {
57 $self->convert_rdf_element ($cn);
58 } else {
59 $self->convert_rdf_node_element ($cn);
60 }
61 $has_element = 1;
62 } else {
63 $self->{onerror}->(type => 'second node element',
64 level => $self->{grammer_level},
65 node => $cn);
66 }
67 } elsif ($cn->node_type == $cn->TEXT_NODE or
68 $cn->node_type == $cn->CDATA_SECTION_NODE) {
69 $self->{onerror}->(type => 'character not allowed',
70 level => $self->{grammer_level},
71 node => $cn);
72 }
73 }
74 } # convert_document
75
76 my $check_rdf_namespace = sub {
77 my $self = shift;
78 my $node = shift;
79 my $node_nsuri = $node->namespace_uri;
80 return unless defined $node_nsuri;
81 if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and
82 length $RDF_URI < length $node_nsuri) {
83 $self->{onerror}->(type => 'bad rdf namespace',
84 level => $self->{fact_level}, # Section 5.1
85 node => $node);
86 }
87 }; # $check_rdf_namespace
88
89 sub convert_rdf_element ($$) {
90 my ($self, $node) = @_;
91
92 $check_rdf_namespace->($self, $node);
93
94 # |RDF|
95
96 for my $attr (@{$node->attributes}) {
97 my $prefix = $attr->prefix;
98 if (defined $prefix) {
99 next if $prefix =~ /^[Xx][Mm][Ll]/;
100 } else {
101 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
102 }
103
104 $check_rdf_namespace->($self, $attr);
105 $self->{onerror}->(type => 'attribute not allowed',
106 level => $self->{grammer_level},
107 node => $attr);
108 }
109
110 # |nodeElementList|
111 for my $cn (@{$node->child_nodes}) {
112 if ($cn->node_type == $cn->ELEMENT_NODE) {
113 $self->convert_node_element ($cn);
114 } elsif ($cn->node_type == $cn->TEXT_NODE or
115 $cn->node_type == $cn->CDATA_SECTION_NODE) {
116 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
117 $self->{onerror}->(type => 'character not allowed',
118 level => $self->{grammer_level},
119 node => $cn);
120 }
121 }
122 }
123 } # convert_rdf_element
124
125 my %coreSyntaxTerms = (
126 $RDF_URI . 'RDF' => 1,
127 $RDF_URI . 'ID' => 1,
128 $RDF_URI . 'about' => 1,
129 $RDF_URI . 'parseType' => 1,
130 $RDF_URI . 'resource' => 1,
131 $RDF_URI . 'nodeID' => 1,
132 $RDF_URI . 'datatype' => 1,
133 );
134
135 my %oldTerms = (
136 $RDF_URI . 'aboutEach' => 1,
137 $RDF_URI . 'aboutEachPrefix' => 1,
138 $RDF_URI . 'bagID' => 1,
139 );
140
141 sub convert_node_element ($$) {
142 my ($self, $node) = @_;
143
144 $check_rdf_namespace->($self, $node);
145
146 # |nodeElement|
147
148 my $xuri = $node->manakai_expanded_uri;
149
150 if ({
151 %coreSyntaxTerms,
152 $RDF_URI . 'li' => 1,
153 %oldTerms,
154 }->{$xuri}) {
155 $self->{onerror}->(type => 'element not allowed',
156 level => $self->{grammer_level},
157 node => $node);
158
159 ## TODO: W3C RDF Validator: Continue validation, but triples that would
160 ## be generated from the subtree are ignored.
161 }
162
163 my $subject;
164 my $rdf_type_attr;
165 my @prop_attr;
166
167 for my $attr (@{$node->attributes}) {
168 my $prefix = $attr->prefix;
169 if (defined $prefix) {
170 next if $prefix =~ /^[Xx][Mm][Ll]/;
171 } else {
172 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
173 }
174
175 $check_rdf_namespace->($self, $attr);
176
177 my $attr_xuri = $attr->manakai_expanded_uri;
178 if ($attr_xuri eq $RDF_URI . 'id') {
179 unless (defined $subject) {
180 $subject = {uri => '#' . $attr->value}; ## TODO: resolve()
181 } else {
182 ## TODO: Ignore triple as W3C RDF Validator does
183 }
184 } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
185 unless (defined $subject) {
186 $subject = {bnodeid => '## TODO: bnode: ' . $attr->value};
187 } else {
188 ## TODO: Ignore triple as W3C RDF Validator does
189 }
190 } elsif ($attr_xuri eq $RDF_URI . 'about') {
191 unless (defined $subject) {
192 $subject = {uri => $attr->value}; ## TODO: resolve
193 } else {
194 ## TODO: Ignore triple as W3C RDF Validator does
195 }
196 } elsif ($attr_xuri eq $RDF_URI . 'type') {
197 $rdf_type_attr = $attr;
198 } elsif ({
199 %coreSyntaxTerms,
200 $RDF_URI . 'li' => 1,
201 $RDF_URI . 'Description' => 1,
202 %oldTerms,
203 }->{$attr_xuri}) {
204 $self->{onerror}->(type => 'attribute not allowed',
205 level => $self->{grammer_level},
206 node => $attr);
207
208 ## TODO: W3C RDF Validator: Ignore triples
209 } else {
210 push @prop_attr, $attr;
211 }
212 }
213
214 unless (defined $subject) {
215 $subject = {bnodeid => '## TODO: new bnodeid'};
216 }
217
218 if ($xuri ne $RDF_URI . 'Description') {
219 $self->{ontriple}->(subject => $subject,
220 predicate => {uri => $RDF_URI . 'type'},
221 object => {uri => $xuri});
222 }
223
224 if ($rdf_type_attr) {
225 $self->{ontriple}->(subject => $subject,
226 predicate => {uri => $RDF_URI . 'type'},
227 object => {uri => $rdf_type_attr->value}); ## TODO: resolve
228 }
229
230 for my $attr (@prop_attr) {
231 $self->{ontriple}->(subject => $subject,
232 predicate => {uri => $attr->manakai_expanded_uri},
233 object => {value => $attr->value}); ## TODO: language
234 ## TODO: SHOULD in NFC
235 }
236
237 # |propertyEltList|
238
239 my $li_counter = 1;
240 for my $cn (@{$node->child_nodes}) {
241 my $cn_type = $cn->node_type;
242 if ($cn_type == $cn->ELEMENT_NODE) {
243 $self->convert_property_element ($cn, li_counter => \$li_counter,
244 subject => $subject);
245 } elsif ($cn_type == $cn->TEXT_NODE or
246 $cn_type == $cn->CDATA_SECTION_NODE) {
247 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
248 $self->{onerror}->(type => 'character not allowed',
249 level => $self->{grammer_level},
250 node => $cn);
251 }
252 }
253 }
254
255 return $subject;
256 } # convert_node_element
257
258 sub convert_property_element ($$%) {
259 my ($self, $node, %opt) = @_;
260
261 $check_rdf_namespace->($self, $node);
262
263 # |propertyElt|
264
265 my $xuri = $node->manakai_expanded_uri;
266 if ($xuri eq $RDF_URI . 'li') {
267 $xuri = $RDF_URI . '_' . ${$opt{li_counter}}++;
268 }
269
270 if ({
271 %coreSyntaxTerms,
272 $RDF_URI . 'Description' => 1,
273 %oldTerms,
274 }->{$xuri}) {
275 $self->{onerror}->(type => 'element not allowed',
276 level => $self->{grammer_level},
277 node => $node);
278 ## TODO: RDF Validator?
279 }
280
281 my $id_attr;
282 my $dt_attr;
283 my $parse_attr;
284 my $nodeid_attr;
285 my $resource_attr;
286 my @prop_attr;
287 for my $attr (@{$node->attributes}) {
288 my $prefix = $attr->prefix;
289 if (defined $prefix) {
290 next if $prefix =~ /^[Xx][Mm][Ll]/;
291 } else {
292 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
293 }
294
295 $check_rdf_namespace->($self, $attr);
296
297 my $attr_xuri = $attr->manakai_expanded_uri;
298 if ($attr_xuri eq $RDF_URI . 'ID') {
299 $id_attr = $attr;
300 } elsif ($attr_xuri eq $RDF_URI . 'datatype') {
301 $dt_attr = $attr;
302 } elsif ($attr_xuri eq $RDF_URI . 'parseType') {
303 $parse_attr = $attr;
304 } elsif ($attr_xuri eq $RDF_URI . 'resource') {
305 $resource_attr = $attr;
306 } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
307 $nodeid_attr = $attr;
308 } elsif ({
309 %coreSyntaxTerms,
310 $RDF_URI . 'li' => 1,
311 $RDF_URI . 'Description' => 1,
312 %oldTerms,
313 }->{$attr_xuri}) {
314 $self->{onerror}->(type => 'attribute not allowed',
315 level => $self->{grammer_level},
316 node => $attr);
317 ## TODO: RDF Validator?
318 } else {
319 push @prop_attr, $attr;
320 }
321 }
322
323 my $parse = $parse_attr ? $parse_attr->value : '';
324 if ($parse eq 'Resource') {
325 # |parseTypeResourcePropertyElt|
326
327 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
328 $self->{onerror}->(type => 'attribute not allowed',
329 level => $self->{grammer_level},
330 node => $attr);
331 ## TODO: RDF Validator?
332 }
333
334 my $object = {bnodeid => '## TODO: generate bnodeid'};
335 $self->{ontriple}->(subject => $opt{subject},
336 predicate => {uri => $xuri},
337 object => $object);
338 ## TODO: reification
339
340 ## As if nodeElement
341
342 # |propertyEltList|
343
344 my $li_counter = 1;
345 for my $cn (@{$node->child_nodes}) {
346 my $cn_type = $cn->node_type;
347 if ($cn_type == $cn->ELEMENT_NODE) {
348 $self->convert_property_element ($cn, li_counter => \$li_counter,
349 subject => $object);
350 } elsif ($cn_type == $cn->TEXT_NODE or
351 $cn_type == $cn->CDATA_SECTION_NODE) {
352 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
353 $self->{onerror}->(type => 'character not allowed',
354 level => $self->{grammer_level},
355 node => $cn);
356 }
357 }
358 }
359 } elsif ($parse eq 'Collection') {
360 # |parseTypeCollectionPropertyElt|
361
362 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
363 $self->{onerror}->(type => 'attribute not allowed',
364 level => $self->{grammer_level},
365 node => $attr);
366 ## TODO: RDF Validator?
367 }
368
369 # |nodeElementList|
370 my @resource;
371 for my $cn (@{$node->child_nodes}) {
372 if ($cn->node_type == $cn->ELEMENT_NODE) {
373 push @resource, [$self->convert_node_element ($cn),
374 {bnodeid => '## TODO: bnodeid generated'}];
375 } elsif ($cn->node_type == $cn->TEXT_NODE or
376 $cn->node_type == $cn->CDATA_SECTION_NODE) {
377 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
378 $self->{onerror}->(type => 'character not allowed',
379 level => $self->{grammer_level},
380 node => $cn);
381 }
382 }
383 }
384
385 if (@resource) {
386 $self->{ontriple}->(subject => $opt{subject},
387 predicate => {uri => $xuri},
388 object => $resource[0]->[1]);
389 } else {
390 $self->{ontriple}->(subject => $opt{subject},
391 predicate => {uri => $xuri},
392 object => {uri => $RDF_URI . 'nil'});
393 }
394 ## TODO: reification
395
396 while (@resource) {
397 my $resource = shift @resource;
398 $self->{ontriple}->(subject => $resource->[1],
399 predicate => {uri => $RDF_URI . 'first'},
400 object => $resource->[0]);
401 if (@resource) {
402 $self->{ontriple}->(subject => $resource->[1],
403 predicate => {uri => $RDF_URI . 'rest'},
404 object => $resource[0]->[1]);
405 } else {
406 $self->{ontriple}->(subject => $resource->[1],
407 predicate => {uri => $RDF_URI . 'rest'},
408 object => {uri => $RDF_URI . 'nil'});
409 }
410 }
411 } elsif ($parse_attr) {
412 # |parseTypeLiteralPropertyElt|
413 # |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does?
414
415 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
416 $self->{onerror}->(type => 'attribute not allowed',
417 level => $self->{grammer_level},
418 node => $attr);
419 ## TODO: RDF Validator?
420 }
421
422 my $value = [@{$node->child_nodes}];
423 ## TODO: Callback for validation
424 ## TODO: Serialized form SHOULD be in NFC.
425
426 $self->{ontriple}->(subject => $opt{subject},
427 predicate => {uri => $xuri},
428 object => {nodes => $value,
429 datatype => $RDF_URI . 'XMLLiteral'});
430 ## TODO: reification
431 } else {
432 my $mode = 'unknown';
433
434 if ($dt_attr) {
435 $mode = 'literal'; # |literalPropertyElt|
436 ## TODO: What RDF Validator does for |< rdf:datatype><el/></>|?
437 }
438 ## TODO: What RDF Validator does for |< prop-attr><non-empty/></>|?
439
440 my $node_element;
441 my $text = '';
442 for my $cn (@{$node->child_nodes}) {
443 my $cn_type = $cn->node_type;
444 if ($cn_type == $cn->ELEMENT_NODE) {
445 unless ($node_element) {
446 $node_element = $cn;
447 if ({
448 resource => 1, unknown => 1, 'literal-or-resource' => 1,
449 }->{$mode}) {
450 $mode = 'resource';
451 } else {
452 $self->{onerror}->(type => 'element not allowed',
453 level => $self->{grammer_level},
454 node => $cn);
455 ## TODO: RDF Validator?
456 }
457 } else {
458 ## TODO: What RDF Validator does?
459 $self->{onerror}->(type => 'second node element',
460 level => $self->{grammer_level},
461 node => $cn);
462 }
463 } elsif ($cn_type == $cn->TEXT_NODE or
464 $cn_type == $cn->CDATA_SECTION_NODE) {
465 my $data = $cn->data;
466 $text .= $data;
467 if ($data =~ /[^\x09\x0A\x0D\x20]/) {
468 if ({
469 literal => 1, unknown => 1, 'literal-or-resource' => 1,
470 }->{$mode}) {
471 $mode = 'literal';
472 } else {
473 $self->{onerror}->(type => 'character not allowed',
474 level => $self->{grammer_level},
475 node => $cn);
476 ## TODO: RDF Validator?
477 }
478 } else {
479 if ($mode eq 'unknown') {
480 $mode = 'literal-or-resource';
481 } else {
482 #
483 }
484 }
485 }
486 }
487
488 if ($mode eq 'resource') {
489 # |resourcePropertyElt|
490
491 for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) {
492 $self->{onerror}->(type => 'attribute not allowed',
493 level => $self->{grammer_level},
494 node => $attr);
495 ## TODO: RDF Validator?
496 }
497
498 my $object = $self->convert_node_element ($node_element);
499
500 $self->{ontriple}->(subject => $opt{subject},
501 predicate => {uri => $xuri},
502 object => $object);
503
504 ## TODO: reification
505 } elsif ($mode eq 'literal' or $mode eq 'literal-or-resource') {
506 # |literalPropertyElt|
507
508 for my $attr (@prop_attr, $resource_attr, $nodeid_attr) {
509 $self->{onerror}->(type => 'attribute not allowed',
510 level => $self->{grammer_level},
511 node => $attr);
512 ## TODO: RDF Validator?
513 }
514
515 ## TODO: $text SHOULD be in NFC
516
517 if ($dt_attr) {
518 $self->{ontriple}->(subject => $opt{subject},
519 predicate => {uri => $xuri},
520 object => {value => $text,
521 datatype => $dt_attr->value});
522 } else {
523 $self->{ontriple}->(subject => $opt{subject},
524 predicate => {uri => $xuri},
525 object => {value => $text,
526 ## TODO: language
527 });
528 }
529
530 ## TODO: reification
531 } else {
532 ## |emptyPropertyElt|
533
534 for my $attr ($dt_attr) {
535 $self->{onerror}->(type => 'attribute not allowed',
536 level => $self->{grammer_level},
537 node => $attr);
538 ## TODO: RDF Validator?
539 }
540
541 if (not $resource_attr and not $nodeid_attr and not @prop_attr) {
542 $self->{ontriple}->(subject => $opt{subject},
543 predicate => {uri => $xuri},
544 object => {value => '',
545 ## TODO: language
546 });
547
548 ## TODO: reification
549 } else {
550 my $object;
551 if ($resource_attr) {
552 $object = {uri => $resource_attr->value}; ## TODO: resolve
553 } elsif ($nodeid_attr) {
554 $object = {bnodeid => $nodeid_attr->value};
555 } else {
556 $object = {bnodeid => '## TODO: generated bnodeid'};
557 }
558
559 for my $attr (@prop_attr) {
560 my $attr_xuri = $attr->manakai_expanded_uri;
561 if ($attr_xuri eq $RDF_URI . 'type') {
562 $self->{ontriple}->(subject => $object,
563 predicate => {uri => $attr_xuri},
564 object => $attr->value); ## TODO: resolve
565 } else {
566 ## TODO: SHOULD be in NFC
567 $self->{ontriple}->(subject => $object,
568 predicate => {uri => $attr_xuri},
569 object => {value => $attr->value,
570 ## TODO: lang
571 });
572 }
573 }
574
575 $self->{ontriple}->(subject => $opt{subject},
576 predicate => {uri => $xuri},
577 object => $object);
578
579 ## TODO: reification
580 }
581 }
582 }
583 } # convert_property_element
584
585 1;
586

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24