/[suikacvs]/markup/html/whatpm/Whatpm/RDFXML.pm
Suika

Contents of /markup/html/whatpm/Whatpm/RDFXML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Fri Mar 21 08:58:35 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +50 -19 lines
++ whatpm/Whatpm/ChangeLog	21 Mar 2008 08:58:05 -0000
	* RDFXML.pm: s/id/ID/ for attribute name.
	The |node| arguments are added for |ontriple| calls.
	Too many "attribute not allowed" errors were raised.

	* ContentChecker.pm: Initial experimental support for rdf:RDF
	element.

2008-03-21  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::RDFXML;
2 use strict;
3
4 ## NOTE: EntityReference nodes are not supported except that
5 ## unexpected entity reference are simply ignored. (In fact
6 ## all EntityRefernce nodes are ignored.)
7
8 ## TODO: Add a callback function invoked for every element
9 ## when XMLCC is implemented in WDCC.
10
11 ## ISSUE: <html:nest/> in RDF subtree?
12
13 ## ISSUE: PIs in RDF subtree should be validated?
14
15 my $RDF_URI = q<http://www.w3.org/1999/02/22-rdf-syntax-ns#>;
16
17 sub new ($) {
18 my $self = bless {fact_level => 'm', grammer_level => 'm'}, shift;
19 $self->{onerror} = sub {
20 my %opt = @_;
21 warn $opt{type}, "\n";
22 };
23 $self->{ontriple} = sub {
24 my %opt = @_;
25 my $dump_resource = sub {
26 my $resource = shift;
27 if ($resource->{uri}) {
28 return '<' . $resource->{uri} . '>';
29 } elsif ($resource->{bnodeid}) {
30 return '_:' . $resource->{bnodeid};
31 } elsif ($resource->{nodes}) {
32 return '"' . join ('', map {$_->inner_html} @{$resource->{nodes}}) .
33 '"^^<' . $resource->{datatype} . '>';
34 } elsif (defined $resource->{value}) {
35 return '"' . $resource->{value} . '"' .
36 (defined $resource->{datatype}
37 ? '^^<' . $resource->{datatype} . '>'
38 : '@' . $resource->{language});
39 } else {
40 return '??';
41 }
42 };
43 print STDERR $dump_resource->($opt{subject}) . ' ';
44 print STDERR $dump_resource->($opt{predicate}) . ' ';
45 print STDERR $dump_resource->($opt{object}) . "\n";
46 };
47 return $self;
48 } # new
49
50 sub convert_document ($$) {
51 my $self = shift;
52 my $node = shift; # Document
53
54 ## ISSUE: An RDF/XML document, either |doc| or |nodeElement|
55 ## is allowed as a starting production. However, |nodeElement|
56 ## is not a Root Event.
57
58 my $has_element;
59
60 for my $cn (@{$node->child_nodes}) {
61 if ($cn->node_type == $cn->ELEMENT_NODE) {
62 unless ($has_element) {
63 if ($cn->manakai_expanded_uri eq $RDF_URI . q<RDF>) {
64 $self->convert_rdf_element ($cn);
65 } else {
66 $self->convert_rdf_node_element ($cn);
67 }
68 $has_element = 1;
69 } else {
70 $self->{onerror}->(type => 'second node element',
71 level => $self->{grammer_level},
72 node => $cn);
73 }
74 } elsif ($cn->node_type == $cn->TEXT_NODE or
75 $cn->node_type == $cn->CDATA_SECTION_NODE) {
76 $self->{onerror}->(type => 'character not allowed',
77 level => $self->{grammer_level},
78 node => $cn);
79 }
80 }
81 } # convert_document
82
83 my $check_rdf_namespace = sub {
84 my $self = shift;
85 my $node = shift;
86 my $node_nsuri = $node->namespace_uri;
87 return unless defined $node_nsuri;
88 if (substr ($node_nsuri, 0, length $RDF_URI) eq $RDF_URI and
89 length $RDF_URI < length $node_nsuri) {
90 $self->{onerror}->(type => 'bad rdf namespace',
91 level => $self->{fact_level}, # Section 5.1
92 node => $node);
93 }
94 }; # $check_rdf_namespace
95
96 sub convert_rdf_element ($$) {
97 my ($self, $node) = @_;
98
99 $check_rdf_namespace->($self, $node);
100
101 # |RDF|
102
103 for my $attr (@{$node->attributes}) {
104 my $prefix = $attr->prefix;
105 if (defined $prefix) {
106 next if $prefix =~ /^[Xx][Mm][Ll]/;
107 } else {
108 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
109 }
110
111 $check_rdf_namespace->($self, $attr);
112 $self->{onerror}->(type => 'attribute not allowed',
113 level => $self->{grammer_level},
114 node => $attr);
115 }
116
117 # |nodeElementList|
118 for my $cn (@{$node->child_nodes}) {
119 if ($cn->node_type == $cn->ELEMENT_NODE) {
120 $self->convert_node_element ($cn);
121 } elsif ($cn->node_type == $cn->TEXT_NODE or
122 $cn->node_type == $cn->CDATA_SECTION_NODE) {
123 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
124 $self->{onerror}->(type => 'character not allowed',
125 level => $self->{grammer_level},
126 node => $cn);
127 }
128 }
129 }
130 } # convert_rdf_element
131
132 my %coreSyntaxTerms = (
133 $RDF_URI . 'RDF' => 1,
134 $RDF_URI . 'ID' => 1,
135 $RDF_URI . 'about' => 1,
136 $RDF_URI . 'parseType' => 1,
137 $RDF_URI . 'resource' => 1,
138 $RDF_URI . 'nodeID' => 1,
139 $RDF_URI . 'datatype' => 1,
140 );
141
142 my %oldTerms = (
143 $RDF_URI . 'aboutEach' => 1,
144 $RDF_URI . 'aboutEachPrefix' => 1,
145 $RDF_URI . 'bagID' => 1,
146 );
147
148 sub convert_node_element ($$) {
149 my ($self, $node) = @_;
150
151 $check_rdf_namespace->($self, $node);
152
153 # |nodeElement|
154
155 my $xuri = $node->manakai_expanded_uri;
156
157 if ({
158 %coreSyntaxTerms,
159 $RDF_URI . 'li' => 1,
160 %oldTerms,
161 }->{$xuri}) {
162 $self->{onerror}->(type => 'element not allowed',
163 level => $self->{grammer_level},
164 node => $node);
165
166 ## TODO: W3C RDF Validator: Continue validation, but triples that would
167 ## be generated from the subtree are ignored.
168 }
169
170 my $subject;
171 my $rdf_type_attr;
172 my @prop_attr;
173
174 for my $attr (@{$node->attributes}) {
175 my $prefix = $attr->prefix;
176 if (defined $prefix) {
177 next if $prefix =~ /^[Xx][Mm][Ll]/;
178 } else {
179 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
180 }
181
182 $check_rdf_namespace->($self, $attr);
183
184 my $attr_xuri = $attr->manakai_expanded_uri;
185 if ($attr_xuri eq $RDF_URI . 'ID') {
186 unless (defined $subject) {
187 $subject = {uri => '#' . $attr->value}; ## TODO: resolve()
188 } else {
189 ## TODO: Ignore triple as W3C RDF Validator does
190 }
191 } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
192 unless (defined $subject) {
193 $subject = {bnodeid => '## TODO: bnode: ' . $attr->value};
194 } else {
195 ## TODO: Ignore triple as W3C RDF Validator does
196 }
197 } elsif ($attr_xuri eq $RDF_URI . 'about') {
198 unless (defined $subject) {
199 $subject = {uri => $attr->value}; ## TODO: resolve
200 } else {
201 ## TODO: Ignore triple as W3C RDF Validator does
202 }
203 } elsif ($attr_xuri eq $RDF_URI . 'type') {
204 $rdf_type_attr = $attr;
205 } elsif ({
206 %coreSyntaxTerms,
207 $RDF_URI . 'li' => 1,
208 $RDF_URI . 'Description' => 1,
209 %oldTerms,
210 }->{$attr_xuri}) {
211 $self->{onerror}->(type => 'attribute not allowed',
212 level => $self->{grammer_level},
213 node => $attr);
214
215 ## TODO: W3C RDF Validator: Ignore triples
216 } else {
217 push @prop_attr, $attr;
218 }
219 }
220
221 unless (defined $subject) {
222 $subject = {bnodeid => '## TODO: new bnodeid'};
223 }
224
225 if ($xuri ne $RDF_URI . 'Description') {
226 $self->{ontriple}->(subject => $subject,
227 predicate => {uri => $RDF_URI . 'type'},
228 object => {uri => $xuri},
229 node => $node);
230 }
231
232 if ($rdf_type_attr) {
233 $self->{ontriple}->(subject => $subject,
234 predicate => {uri => $RDF_URI . 'type'},
235 object => {uri => $rdf_type_attr->value}, ## TODO: resolve
236 node => $rdf_type_attr);
237 }
238
239 for my $attr (@prop_attr) {
240 $self->{ontriple}->(subject => $subject,
241 predicate => {uri => $attr->manakai_expanded_uri},
242 object => {value => $attr->value}, ## TODO: language
243 node => $attr);
244 ## TODO: SHOULD in NFC
245 }
246
247 # |propertyEltList|
248
249 my $li_counter = 1;
250 for my $cn (@{$node->child_nodes}) {
251 my $cn_type = $cn->node_type;
252 if ($cn_type == $cn->ELEMENT_NODE) {
253 $self->convert_property_element ($cn, li_counter => \$li_counter,
254 subject => $subject);
255 } elsif ($cn_type == $cn->TEXT_NODE or
256 $cn_type == $cn->CDATA_SECTION_NODE) {
257 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
258 $self->{onerror}->(type => 'character not allowed',
259 level => $self->{grammer_level},
260 node => $cn);
261 }
262 }
263 }
264
265 return $subject;
266 } # convert_node_element
267
268 sub convert_property_element ($$%) {
269 my ($self, $node, %opt) = @_;
270
271 $check_rdf_namespace->($self, $node);
272
273 # |propertyElt|
274
275 my $xuri = $node->manakai_expanded_uri;
276 if ($xuri eq $RDF_URI . 'li') {
277 $xuri = $RDF_URI . '_' . ${$opt{li_counter}}++;
278 }
279
280 if ({
281 %coreSyntaxTerms,
282 $RDF_URI . 'Description' => 1,
283 %oldTerms,
284 }->{$xuri}) {
285 $self->{onerror}->(type => 'element not allowed',
286 level => $self->{grammer_level},
287 node => $node);
288 ## TODO: RDF Validator?
289 }
290
291 my $id_attr;
292 my $dt_attr;
293 my $parse_attr;
294 my $nodeid_attr;
295 my $resource_attr;
296 my @prop_attr;
297 for my $attr (@{$node->attributes}) {
298 my $prefix = $attr->prefix;
299 if (defined $prefix) {
300 next if $prefix =~ /^[Xx][Mm][Ll]/;
301 } else {
302 next if $attr->manakai_local_name =~ /^[Xx][Mm][Ll]/;
303 }
304
305 $check_rdf_namespace->($self, $attr);
306
307 my $attr_xuri = $attr->manakai_expanded_uri;
308 if ($attr_xuri eq $RDF_URI . 'ID') {
309 $id_attr = $attr;
310 } elsif ($attr_xuri eq $RDF_URI . 'datatype') {
311 $dt_attr = $attr;
312 } elsif ($attr_xuri eq $RDF_URI . 'parseType') {
313 $parse_attr = $attr;
314 } elsif ($attr_xuri eq $RDF_URI . 'resource') {
315 $resource_attr = $attr;
316 } elsif ($attr_xuri eq $RDF_URI . 'nodeID') {
317 $nodeid_attr = $attr;
318 } elsif ({
319 %coreSyntaxTerms,
320 $RDF_URI . 'li' => 1,
321 $RDF_URI . 'Description' => 1,
322 %oldTerms,
323 }->{$attr_xuri}) {
324 $self->{onerror}->(type => 'attribute not allowed',
325 level => $self->{grammer_level},
326 node => $attr);
327 ## TODO: RDF Validator?
328 } else {
329 push @prop_attr, $attr;
330 }
331 }
332
333 my $parse = $parse_attr ? $parse_attr->value : '';
334 if ($parse eq 'Resource') {
335 # |parseTypeResourcePropertyElt|
336
337 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
338 next unless $attr;
339 $self->{onerror}->(type => 'attribute not allowed',
340 level => $self->{grammer_level},
341 node => $attr);
342 ## TODO: RDF Validator?
343 }
344
345 my $object = {bnodeid => '## TODO: generate bnodeid'};
346 $self->{ontriple}->(subject => $opt{subject},
347 predicate => {uri => $xuri},
348 object => $object,
349 node => $node);
350 ## TODO: reification
351
352 ## As if nodeElement
353
354 # |propertyEltList|
355
356 my $li_counter = 1;
357 for my $cn (@{$node->child_nodes}) {
358 my $cn_type = $cn->node_type;
359 if ($cn_type == $cn->ELEMENT_NODE) {
360 $self->convert_property_element ($cn, li_counter => \$li_counter,
361 subject => $object);
362 } elsif ($cn_type == $cn->TEXT_NODE or
363 $cn_type == $cn->CDATA_SECTION_NODE) {
364 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
365 $self->{onerror}->(type => 'character not allowed',
366 level => $self->{grammer_level},
367 node => $cn);
368 }
369 }
370 }
371 } elsif ($parse eq 'Collection') {
372 # |parseTypeCollectionPropertyElt|
373
374 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
375 next unless $attr;
376 $self->{onerror}->(type => 'attribute not allowed',
377 level => $self->{grammer_level},
378 node => $attr);
379 ## TODO: RDF Validator?
380 }
381
382 # |nodeElementList|
383 my @resource;
384 for my $cn (@{$node->child_nodes}) {
385 if ($cn->node_type == $cn->ELEMENT_NODE) {
386 push @resource, [$self->convert_node_element ($cn),
387 {bnodeid => '## TODO: bnodeid generated'},
388 $cn];
389 } elsif ($cn->node_type == $cn->TEXT_NODE or
390 $cn->node_type == $cn->CDATA_SECTION_NODE) {
391 if ($cn->data =~ /[^\x09\x0A\x0D\x20]/) {
392 $self->{onerror}->(type => 'character not allowed',
393 level => $self->{grammer_level},
394 node => $cn);
395 }
396 }
397 }
398
399 if (@resource) {
400 $self->{ontriple}->(subject => $opt{subject},
401 predicate => {uri => $xuri},
402 object => $resource[0]->[1],
403 node => $node);
404 } else {
405 $self->{ontriple}->(subject => $opt{subject},
406 predicate => {uri => $xuri},
407 object => {uri => $RDF_URI . 'nil'},
408 node => $node);
409 }
410 ## TODO: reification
411
412 while (@resource) {
413 my $resource = shift @resource;
414 $self->{ontriple}->(subject => $resource->[1],
415 predicate => {uri => $RDF_URI . 'first'},
416 object => $resource->[0],
417 node => $resource->[2]);
418 if (@resource) {
419 $self->{ontriple}->(subject => $resource->[1],
420 predicate => {uri => $RDF_URI . 'rest'},
421 object => $resource[0]->[1],
422 node => $resource->[2]);
423 } else {
424 $self->{ontriple}->(subject => $resource->[1],
425 predicate => {uri => $RDF_URI . 'rest'},
426 object => {uri => $RDF_URI . 'nil'},
427 node => $resource->[2]);
428 }
429 }
430 } elsif ($parse_attr) {
431 # |parseTypeLiteralPropertyElt|
432 # |parseTypeOtherPropertyElt| ## TODO: What RDF Validator does?
433
434 for my $attr ($resource_attr, $nodeid_attr, $dt_attr) {
435 next unless $attr;
436 $self->{onerror}->(type => 'attribute not allowed',
437 level => $self->{grammer_level},
438 node => $attr);
439 ## TODO: RDF Validator?
440 }
441
442 my $value = [@{$node->child_nodes}];
443 ## TODO: Callback for validation
444 ## TODO: Serialized form SHOULD be in NFC.
445
446 $self->{ontriple}->(subject => $opt{subject},
447 predicate => {uri => $xuri},
448 object => {nodes => $value,
449 datatype => $RDF_URI . 'XMLLiteral'},
450 node => $node);
451 ## TODO: reification
452 } else {
453 my $mode = 'unknown';
454
455 if ($dt_attr) {
456 $mode = 'literal'; # |literalPropertyElt|
457 ## TODO: What RDF Validator does for |< rdf:datatype><el/></>|?
458 }
459 ## TODO: What RDF Validator does for |< prop-attr><non-empty/></>|?
460
461 my $node_element;
462 my $text = '';
463 for my $cn (@{$node->child_nodes}) {
464 my $cn_type = $cn->node_type;
465 if ($cn_type == $cn->ELEMENT_NODE) {
466 unless ($node_element) {
467 $node_element = $cn;
468 if ({
469 resource => 1, unknown => 1, 'literal-or-resource' => 1,
470 }->{$mode}) {
471 $mode = 'resource';
472 } else {
473 $self->{onerror}->(type => 'element not allowed',
474 level => $self->{grammer_level},
475 node => $cn);
476 ## TODO: RDF Validator?
477 }
478 } else {
479 ## TODO: What RDF Validator does?
480 $self->{onerror}->(type => 'second node element',
481 level => $self->{grammer_level},
482 node => $cn);
483 }
484 } elsif ($cn_type == $cn->TEXT_NODE or
485 $cn_type == $cn->CDATA_SECTION_NODE) {
486 my $data = $cn->data;
487 $text .= $data;
488 if ($data =~ /[^\x09\x0A\x0D\x20]/) {
489 if ({
490 literal => 1, unknown => 1, 'literal-or-resource' => 1,
491 }->{$mode}) {
492 $mode = 'literal';
493 } else {
494 $self->{onerror}->(type => 'character not allowed',
495 level => $self->{grammer_level},
496 node => $cn);
497 ## TODO: RDF Validator?
498 }
499 } else {
500 if ($mode eq 'unknown') {
501 $mode = 'literal-or-resource';
502 } else {
503 #
504 }
505 }
506 }
507 }
508
509 if ($mode eq 'resource') {
510 # |resourcePropertyElt|
511
512 for my $attr (@prop_attr, $resource_attr, $nodeid_attr, $dt_attr) {
513 next unless $attr;
514 $self->{onerror}->(type => 'attribute not allowed',
515 level => $self->{grammer_level},
516 node => $attr);
517 ## TODO: RDF Validator?
518 }
519
520 my $object = $self->convert_node_element ($node_element);
521
522 $self->{ontriple}->(subject => $opt{subject},
523 predicate => {uri => $xuri},
524 object => $object,
525 node => $node);
526
527 ## TODO: reification
528 } elsif ($mode eq 'literal' or $mode eq 'literal-or-resource') {
529 # |literalPropertyElt|
530
531 for my $attr (@prop_attr, $resource_attr, $nodeid_attr) {
532 next unless $attr;
533 $self->{onerror}->(type => 'attribute not allowed',
534 level => $self->{grammer_level},
535 node => $attr);
536 ## TODO: RDF Validator?
537 }
538
539 ## TODO: $text SHOULD be in NFC
540
541 if ($dt_attr) {
542 $self->{ontriple}->(subject => $opt{subject},
543 predicate => {uri => $xuri},
544 object => {value => $text,
545 datatype => $dt_attr->value},
546 node => $node);
547 } else {
548 $self->{ontriple}->(subject => $opt{subject},
549 predicate => {uri => $xuri},
550 object => {value => $text,
551 ## TODO: language
552 },
553 node => $node);
554 }
555
556 ## TODO: reification
557 } else {
558 ## |emptyPropertyElt|
559
560 for my $attr ($dt_attr) {
561 next unless $attr;
562 $self->{onerror}->(type => 'attribute not allowed',
563 level => $self->{grammer_level},
564 node => $attr);
565 ## TODO: RDF Validator?
566 }
567
568 if (not $resource_attr and not $nodeid_attr and not @prop_attr) {
569 $self->{ontriple}->(subject => $opt{subject},
570 predicate => {uri => $xuri},
571 object => {value => '',
572 ## TODO: language
573 },
574 node => $node);
575
576 ## TODO: reification
577 } else {
578 my $object;
579 if ($resource_attr) {
580 $object = {uri => $resource_attr->value}; ## TODO: resolve
581 } elsif ($nodeid_attr) {
582 $object = {bnodeid => $nodeid_attr->value};
583 } else {
584 $object = {bnodeid => '## TODO: generated bnodeid'};
585 }
586
587 for my $attr (@prop_attr) {
588 my $attr_xuri = $attr->manakai_expanded_uri;
589 if ($attr_xuri eq $RDF_URI . 'type') {
590 $self->{ontriple}->(subject => $object,
591 predicate => {uri => $attr_xuri},
592 object => $attr->value, ## TODO: resolve
593 node => $attr);
594 } else {
595 ## TODO: SHOULD be in NFC
596 $self->{ontriple}->(subject => $object,
597 predicate => {uri => $attr_xuri},
598 object => {value => $attr->value,
599 ## TODO: lang
600 },
601 node => $attr);
602 }
603 }
604
605 $self->{ontriple}->(subject => $opt{subject},
606 predicate => {uri => $xuri},
607 object => $object,
608 node => $node);
609
610 ## TODO: reification
611 }
612 }
613 }
614 } # convert_property_element
615
616 1;
617

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24