/[suikacvs]/markup/html/whatpm/Whatpm/RDFXML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/RDFXML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Fri Mar 21 09:18:40 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +33 -22 lines
*** empty log message ***

1 package Whatpm::RDFXML;
2 use strict;
3
4 ## NOTE: EntityReference nodes are not supported except that
5 ## unexpected entity reference are simply ignored. (In fact
6 ## all EntityRefernce nodes are ignored.)
7
8 ## TODO: Add a callback function invoked for every element
9 ## when XMLCC is implemented in WDCC.
10
11 ## ISSUE: <html:nest/> in RDF subtree?
12
13 ## ISSUE: PIs in RDF subtree should be validated?
14
15 my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>;
16
17 sub new ($) {
18 my $self = bless {fact_level => 'm', grammer_level => 'm'}, shift;
19 $self->{onerror} = sub {
20 my %opt = @_;
21 warn $opt{type}, "\n";
22 };
23 $self->{ontriple} = sub {
24 my %opt = @_;
25 my $dump_resource = sub {
26 my $resource = shift;
27 if (defined $resource->{uri}) {
28 return '<' . $resource->{uri} . '>';
29 } elsif (defined $resource->{bnodeid}) {
30 return '_:' . $resource->{bnodeid};
31 } elsif ($resource->{nodes}) {
32 return '"' . join ('', map {$_->inner_html} @{$resource->{nodes}}) .
33 '"^^<' . $resource->{datatype} . '>';
34 } elsif (defined $resource->{value}) {
35 return '"' . $resource->{value} . '"' .
36 (defined $resource->{datatype}
37 ? '^^<' . $resource->{datatype} . '>'
38 : '@' . $resource->{language});
39 } else {
40 return '??';
41 }
42 };
43 print STDERR $dump_resource->($opt{subject}) . ' ';
44 print STDERR $dump_resource->($opt{predicate}) . ' ';
45 print STDERR $dump_resource->($opt{object}) . "\n";
46 if ($dump_resource->{id}) {
47 print STDERR $dump_resource->($dump_resource->{id}) . ' ';
48 print STDERR $dump_resource->({uri => $RDF_URI . 'subject'}) . ' ';
49 print STDERR $dump_resource->($opt{subject}) . "\n";
50 print STDERR $dump_resource->($dump_resource->{id}) . ' ';
51 print STDERR $dump_resource->({uri => $RDF_URI . 'predicate'}) . ' ';
52 print STDERR $dump_resource->($opt{predicate}) . "\n";
53 print STDERR $dump_resource->($dump_resource->{id}) . ' ';
54 print STDERR $dump_resource->({uri => $RDF_URI . 'object'}) . ' ';
55 print STDERR $dump_resource->($opt{object}) . "\n";
56 print STDERR $dump_resource->($dump_resource->{id}) . ' ';
57 print STDERR $dump_resource->({uri => $RDF_URI . 'type'}) . ' ';
58 print STDERR $dump_resource->({uri => $RDF_URI . 'Statement'}) . "\n";
59 }
60 };
61 return $self;
62 } # new
63
64 sub convert_document ($$) {
65 my $self = shift;
66 my $node = shift; # Document
67
68 ## ISSUE: An RDF/XML document, either |doc| or |nodeElement|
69 ## is allowed as a starting production. However, |nodeElement|
70 ## is not a Root Event.
71
72 my $has_element;
73
74 for my $cn (@{$node->child_nodes}) {
75 if ($cn->node_type == $cn->ELEMENT_NODE) {
76 unless ($has_element) {
77 if ($cn->manakai_expanded_uri eq $RDF_URI . q<RDF>) {
78 $self->convert_rdf_element ($cn);
79 } else {
80 $self->convert_rdf_node_element ($cn);
81 }
82 $has_element = 1;
83 } else {
84 $self->{onerror}->(type => 'second node element',
85 level => $self->{grammer_level},
86 node => $cn);
87 }
88 } elsif ($cn->node_type == $cn->TEXT_NODE or
89 $cn->node_type == $cn->CDATA_SECTION_NODE) {
90 $self->{onerror}->(type => 'character not allowed',
91 level => $self->{grammer_level},
92 node => $cn);
93 }
94 }
95 } # convert_document
96
97 my $check_rdf_namespace = sub {
98 my $self = shift;
99 my $node = shift;
100 my $node_nsuri = $node->namespace_uri;
101 return unless defined $node_nsuri;
102 if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and
103 length $RDF_URI < length $node_nsuri) {
104 $self->{onerror}->(type => 'bad rdf namespace',
105 level => $self->{fact_level}, # Section 5.1
106 node => $node);
107 }
108 }; # $check_rdf_namespace
109
110 sub convert_rdf_element ($$) {
111 my ($self, $node) = @_;
112
113 $check_rdf_namespace->($self, $node);
114
115 # |RDF|
116
117 for my $attr (@{$node->attributes}) {
118 my $prefix = $attr->prefix;
119 if (defined $prefix) {
120 next if $prefix =~ /^[Xx][Mm][Ll]/;
121 } else {
122 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
123 }
124
125 $check_rdf_namespace->($self, $attr);
126 $self->{onerror}->(type => 'attribute not allowed',
127 level => $self->{grammer_level},
128 node => $attr);
129 }
130
131 # |nodeElementList|
132 for my $cn (@{$node->child_nodes}) {
133 if ($cn->node_type == $cn->ELEMENT_NODE) {
134 $self->convert_node_element ($cn);
135 } elsif ($cn->node_type == $cn->TEXT_NODE or
136 $cn->node_type == $cn->CDATA_SECTION_NODE) {
137 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
138 $self->{onerror}->(type => 'character not allowed',
139 level => $self->{grammer_level},
140 node => $cn);
141 }
142 }
143 }
144 } # convert_rdf_element
145
146 my %coreSyntaxTerms = (
147 $RDF_URI . 'RDF' => 1,
148 $RDF_URI . 'ID' => 1,
149 $RDF_URI . 'about' => 1,
150 $RDF_URI . 'parseType' => 1,
151 $RDF_URI . 'resource' => 1,
152 $RDF_URI . 'nodeID' => 1,
153 $RDF_URI . 'datatype' => 1,
154 );
155
156 my %oldTerms = (
157 $RDF_URI . 'aboutEach' => 1,
158 $RDF_URI . 'aboutEachPrefix' => 1,
159 $RDF_URI . 'bagID' => 1,
160 );
161
162 sub convert_node_element ($$) {
163 my ($self, $node) = @_;
164
165 $check_rdf_namespace->($self, $node);
166
167 # |nodeElement|
168
169 my $xuri = $node->manakai_expanded_uri;
170
171 if ({
172 %coreSyntaxTerms,
173 $RDF_URI . 'li' => 1,
174 %oldTerms,
175 }->{$xuri}) {
176 $self->{onerror}->(type => 'element not allowed',
177 level => $self->{grammer_level},
178 node => $node);
179
180 ## TODO: W3C RDF Validator: Continue validation, but triples that would
181 ## be generated from the subtree are ignored.
182 }
183
184 my $subject;
185 my $rdf_type_attr;
186 my @prop_attr;
187
188 for my $attr (@{$node->attributes}) {
189 my $prefix = $attr->prefix;
190 if (defined $prefix) {
191 next if $prefix =~ /^[Xx][Mm][Ll]/;
192 } else {
193 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
194 }
195
196 $check_rdf_namespace->($self, $attr);
197
198 my $attr_xuri = $attr->manakai_expanded_uri;
199 if ($attr_xuri eq $RDF_URI . 'ID') {
200 unless (defined $subject) {
201 $subject = {uri => '#' . $attr->value}; ## TODO: resolve()
202 } else {
203 ## TODO: Ignore triple as W3C RDF Validator does
204 }
205 } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
206 unless (defined $subject) {
207 $subject = {bnodeid => '## TODO: bnode: ' . $attr->value};
208 } else {
209 ## TODO: Ignore triple as W3C RDF Validator does
210 }
211 } elsif ($attr_xuri eq $RDF_URI . 'about') {
212 unless (defined $subject) {
213 $subject = {uri => $attr->value}; ## TODO: resolve
214 } else {
215 ## TODO: Ignore triple as W3C RDF Validator does
216 }
217 } elsif ($attr_xuri eq $RDF_URI . 'type') {
218 $rdf_type_attr = $attr;
219 } elsif ({
220 %coreSyntaxTerms,
221 $RDF_URI . 'li' => 1,
222 $RDF_URI . 'Description' => 1,
223 %oldTerms,
224 }->{$attr_xuri}) {
225 $self->{onerror}->(type => 'attribute not allowed',
226 level => $self->{grammer_level},
227 node => $attr);
228
229 ## TODO: W3C RDF Validator: Ignore triples
230 } else {
231 push @prop_attr, $attr;
232 }
233 }
234
235 unless (defined $subject) {
236 $subject = {bnodeid => '## TODO: new bnodeid'};
237 }
238
239 if ($xuri ne $RDF_URI . 'Description') {
240 $self->{ontriple}->(subject => $subject,
241 predicate => {uri => $RDF_URI . 'type'},
242 object => {uri => $xuri},
243 node => $node);
244 }
245
246 if ($rdf_type_attr) {
247 $self->{ontriple}->(subject => $subject,
248 predicate => {uri => $RDF_URI . 'type'},
249 object => {uri => $rdf_type_attr->value}, ## TODO: resolve
250 node => $rdf_type_attr);
251 }
252
253 for my $attr (@prop_attr) {
254 $self->{ontriple}->(subject => $subject,
255 predicate => {uri => $attr->manakai_expanded_uri},
256 object => {value => $attr->value}, ## TODO: language
257 node => $attr);
258 ## TODO: SHOULD in NFC
259 }
260
261 # |propertyEltList|
262
263 my $li_counter = 1;
264 for my $cn (@{$node->child_nodes}) {
265 my $cn_type = $cn->node_type;
266 if ($cn_type == $cn->ELEMENT_NODE) {
267 $self->convert_property_element ($cn, li_counter => \$li_counter,
268 subject => $subject);
269 } elsif ($cn_type == $cn->TEXT_NODE or
270 $cn_type == $cn->CDATA_SECTION_NODE) {
271 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
272 $self->{onerror}->(type => 'character not allowed',
273 level => $self->{grammer_level},
274 node => $cn);
275 }
276 }
277 }
278
279 return $subject;
280 } # convert_node_element
281
282 sub convert_property_element ($$%) {
283 my ($self, $node, %opt) = @_;
284
285 $check_rdf_namespace->($self, $node);
286
287 # |propertyElt|
288
289 my $xuri = $node->manakai_expanded_uri;
290 if ($xuri eq $RDF_URI . 'li') {
291 $xuri = $RDF_URI . '_' . ${$opt{li_counter}}++;
292 }
293
294 if ({
295 %coreSyntaxTerms,
296 $RDF_URI . 'Description' => 1,
297 %oldTerms,
298 }->{$xuri}) {
299 $self->{onerror}->(type => 'element not allowed',
300 level => $self->{grammer_level},
301 node => $node);
302 ## TODO: RDF Validator?
303 }
304
305 my $id_attr;
306 my $dt_attr;
307 my $parse_attr;
308 my $nodeid_attr;
309 my $resource_attr;
310 my @prop_attr;
311 for my $attr (@{$node->attributes}) {
312 my $prefix = $attr->prefix;
313 if (defined $prefix) {
314 next if $prefix =~ /^[Xx][Mm][Ll]/;
315 } else {
316 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
317 }
318
319 $check_rdf_namespace->($self, $attr);
320
321 my $attr_xuri = $attr->manakai_expanded_uri;
322 if ($attr_xuri eq $RDF_URI . 'ID') {
323 $id_attr = $attr;
324 } elsif ($attr_xuri eq $RDF_URI . 'datatype') {
325 $dt_attr = $attr;
326 } elsif ($attr_xuri eq $RDF_URI . 'parseType') {
327 $parse_attr = $attr;
328 } elsif ($attr_xuri eq $RDF_URI . 'resource') {
329 $resource_attr = $attr;
330 } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
331 $nodeid_attr = $attr;
332 } elsif ({
333 %coreSyntaxTerms,
334 $RDF_URI . 'li' => 1,
335 $RDF_URI . 'Description' => 1,
336 %oldTerms,
337 }->{$attr_xuri}) {
338 $self->{onerror}->(type => 'attribute not allowed',
339 level => $self->{grammer_level},
340 node => $attr);
341 ## TODO: RDF Validator?
342 } else {
343 push @prop_attr, $attr;
344 }
345 }
346
347 my $parse = $parse_attr ? $parse_attr->value : '';
348 if ($parse eq 'Resource') {
349 # |parseTypeResourcePropertyElt|
350
351 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
352 next unless $attr;
353 $self->{onerror}->(type => 'attribute not allowed',
354 level => $self->{grammer_level},
355 node => $attr);
356 ## TODO: RDF Validator?
357 }
358
359 my $object = {bnodeid => '## TODO: generate bnodeid'};
360 $self->{ontriple}->(subject => $opt{subject},
361 predicate => {uri => $xuri},
362 object => $object,
363 node => $node,
364 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
365
366 ## As if nodeElement
367
368 # |propertyEltList|
369
370 my $li_counter = 1;
371 for my $cn (@{$node->child_nodes}) {
372 my $cn_type = $cn->node_type;
373 if ($cn_type == $cn->ELEMENT_NODE) {
374 $self->convert_property_element ($cn, li_counter => \$li_counter,
375 subject => $object);
376 } elsif ($cn_type == $cn->TEXT_NODE or
377 $cn_type == $cn->CDATA_SECTION_NODE) {
378 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
379 $self->{onerror}->(type => 'character not allowed',
380 level => $self->{grammer_level},
381 node => $cn);
382 }
383 }
384 }
385 } elsif ($parse eq 'Collection') {
386 # |parseTypeCollectionPropertyElt|
387
388 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
389 next unless $attr;
390 $self->{onerror}->(type => 'attribute not allowed',
391 level => $self->{grammer_level},
392 node => $attr);
393 ## TODO: RDF Validator?
394 }
395
396 # |nodeElementList|
397 my @resource;
398 for my $cn (@{$node->child_nodes}) {
399 if ($cn->node_type == $cn->ELEMENT_NODE) {
400 push @resource, [$self->convert_node_element ($cn),
401 {bnodeid => '## TODO: bnodeid generated'},
402 $cn];
403 } elsif ($cn->node_type == $cn->TEXT_NODE or
404 $cn->node_type == $cn->CDATA_SECTION_NODE) {
405 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
406 $self->{onerror}->(type => 'character not allowed',
407 level => $self->{grammer_level},
408 node => $cn);
409 }
410 }
411 }
412
413 if (@resource) {
414 $self->{ontriple}->(subject => $opt{subject},
415 predicate => {uri => $xuri},
416 object => $resource[0]->[1],
417 node => $node);
418 } else {
419 $self->{ontriple}->(subject => $opt{subject},
420 predicate => {uri => $xuri},
421 object => {uri => $RDF_URI . 'nil'},
422 node => $node,
423 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
424 }
425
426 while (@resource) {
427 my $resource = shift @resource;
428 $self->{ontriple}->(subject => $resource->[1],
429 predicate => {uri => $RDF_URI . 'first'},
430 object => $resource->[0],
431 node => $resource->[2]);
432 if (@resource) {
433 $self->{ontriple}->(subject => $resource->[1],
434 predicate => {uri => $RDF_URI . 'rest'},
435 object => $resource[0]->[1],
436 node => $resource->[2]);
437 } else {
438 $self->{ontriple}->(subject => $resource->[1],
439 predicate => {uri => $RDF_URI . 'rest'},
440 object => {uri => $RDF_URI . 'nil'},
441 node => $resource->[2]);
442 }
443 }
444 } elsif ($parse_attr) {
445 # |parseTypeLiteralPropertyElt|
446 # |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does?
447
448 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
449 next unless $attr;
450 $self->{onerror}->(type => 'attribute not allowed',
451 level => $self->{grammer_level},
452 node => $attr);
453 ## TODO: RDF Validator?
454 }
455
456 my $value = [@{$node->child_nodes}];
457 ## TODO: Callback for validation
458 ## TODO: Serialized form SHOULD be in NFC.
459
460 $self->{ontriple}->(subject => $opt{subject},
461 predicate => {uri => $xuri},
462 object => {nodes => $value,
463 datatype => $RDF_URI . 'XMLLiteral'},
464 node => $node,
465 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
466 } else {
467 my $mode = 'unknown';
468
469 if ($dt_attr) {
470 $mode = 'literal'; # |literalPropertyElt|
471 ## TODO: What RDF Validator does for |< rdf:datatype><el/></>|?
472 }
473 ## TODO: What RDF Validator does for |< prop-attr><non-empty/></>|?
474
475 my $node_element;
476 my $text = '';
477 for my $cn (@{$node->child_nodes}) {
478 my $cn_type = $cn->node_type;
479 if ($cn_type == $cn->ELEMENT_NODE) {
480 unless ($node_element) {
481 $node_element = $cn;
482 if ({
483 resource => 1, unknown => 1, 'literal-or-resource' => 1,
484 }->{$mode}) {
485 $mode = 'resource';
486 } else {
487 $self->{onerror}->(type => 'element not allowed',
488 level => $self->{grammer_level},
489 node => $cn);
490 ## TODO: RDF Validator?
491 }
492 } else {
493 ## TODO: What RDF Validator does?
494 $self->{onerror}->(type => 'second node element',
495 level => $self->{grammer_level},
496 node => $cn);
497 }
498 } elsif ($cn_type == $cn->TEXT_NODE or
499 $cn_type == $cn->CDATA_SECTION_NODE) {
500 my $data = $cn->data;
501 $text .= $data;
502 if ($data =~ /[^\x09\x0A\x0D\x20]/) {
503 if ({
504 literal => 1, unknown => 1, 'literal-or-resource' => 1,
505 }->{$mode}) {
506 $mode = 'literal';
507 } else {
508 $self->{onerror}->(type => 'character not allowed',
509 level => $self->{grammer_level},
510 node => $cn);
511 ## TODO: RDF Validator?
512 }
513 } else {
514 if ($mode eq 'unknown') {
515 $mode = 'literal-or-resource';
516 } else {
517 #
518 }
519 }
520 }
521 }
522
523 if ($mode eq 'resource') {
524 # |resourcePropertyElt|
525
526 for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) {
527 next unless $attr;
528 $self->{onerror}->(type => 'attribute not allowed',
529 level => $self->{grammer_level},
530 node => $attr);
531 ## TODO: RDF Validator?
532 }
533
534 my $object = $self->convert_node_element ($node_element);
535
536 $self->{ontriple}->(subject => $opt{subject},
537 predicate => {uri => $xuri},
538 object => $object,
539 node => $node,
540 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
541 } elsif ($mode eq 'literal' or $mode eq 'literal-or-resource') {
542 # |literalPropertyElt|
543
544 for my $attr (@prop_attr, $resource_attr, $nodeid_attr) {
545 next unless $attr;
546 $self->{onerror}->(type => 'attribute not allowed',
547 level => $self->{grammer_level},
548 node => $attr);
549 ## TODO: RDF Validator?
550 }
551
552 ## TODO: $text SHOULD be in NFC
553
554 if ($dt_attr) {
555 $self->{ontriple}->(subject => $opt{subject},
556 predicate => {uri => $xuri},
557 object => {value => $text,
558 datatype => $dt_attr->value},
559 node => $node,
560 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
561 } else {
562 $self->{ontriple}->(subject => $opt{subject},
563 predicate => {uri => $xuri},
564 object => {value => $text,
565 ## TODO: language
566 },
567 node => $node,
568 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
569 }
570 } else {
571 ## |emptyPropertyElt|
572
573 for my $attr ($dt_attr) {
574 next unless $attr;
575 $self->{onerror}->(type => 'attribute not allowed',
576 level => $self->{grammer_level},
577 node => $attr);
578 ## TODO: RDF Validator?
579 }
580
581 if (not $resource_attr and not $nodeid_attr and not @prop_attr) {
582 $self->{ontriple}->(subject => $opt{subject},
583 predicate => {uri => $xuri},
584 object => {value => '',
585 ## TODO: language
586 },
587 node => $node,
588 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
589 } else {
590 my $object;
591 if ($resource_attr) {
592 $object = {uri => $resource_attr->value}; ## TODO: resolve
593 } elsif ($nodeid_attr) {
594 $object = {bnodeid => $nodeid_attr->value};
595 } else {
596 $object = {bnodeid => '## TODO: generated bnodeid'};
597 }
598
599 for my $attr (@prop_attr) {
600 my $attr_xuri = $attr->manakai_expanded_uri;
601 if ($attr_xuri eq $RDF_URI . 'type') {
602 $self->{ontriple}->(subject => $object,
603 predicate => {uri => $attr_xuri},
604 object => $attr->value, ## TODO: resolve
605 node => $attr);
606 } else {
607 ## TODO: SHOULD be in NFC
608 $self->{ontriple}->(subject => $object,
609 predicate => {uri => $attr_xuri},
610 object => {value => $attr->value,
611 ## TODO: lang
612 },
613 node => $attr);
614 }
615 }
616
617 $self->{ontriple}->(subject => $opt{subject},
618 predicate => {uri => $xuri},
619 object => $object,
620 node => $node,
621 id => $id_attr ? {uri => '#' . $id_attr->value} : undef); ## TODO: resolve
622 }
623 }
624 }
625 } # convert_property_element
626
627 1;
628

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24