/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sun Apr 27 11:45:59 2003 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
Markup: New.

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20     use overload '""' => \&stringify;
21     use Char::Class::XML qw!InXML_NCNameStartChar InXMLNCNameChar!;
22     our %Namespace_URI_to_prefix = (
23     'DAV:' => [qw/dav webdav/],
24     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
25     'http://icl.com/saxon' => [qw/saxon/],
26     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
27     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
28     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
29     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
30     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
31     'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf RDF/],
32     'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
33     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
34     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
35     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
36     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
37     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
38     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
39     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
40     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
41     'http://www.w3.org/Graphics/SVG/svg-19990412.dtd' => ['', qw/svg/],
42     'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
43     'http://xml.apache.org/xalan' => [qw/xalan/],
44     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
45     'urn:schemas-microsoft-com:office:excel' => [qw/x excel/],
46     'urn:schemas-microsoft-com:office:office' => [qw/o office/],
47     'urn:schemas-microsoft-com:office:word' => [qw/w word/],
48     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
49     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
50     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
51     );
52     my %Cache;
53    
54     =head1 METHODS
55    
56     =over 4
57    
58     =item $x = SuikaWiki::Markup::XML->new (%options)
59    
60     Returns new instance of the module. It is itself a node.
61    
62     Available options: C<type> (default: C<#element>), C<local_name>, C<namespace_uri>,
63     C<value>.
64    
65     =cut
66    
67     sub new ($;%) {
68     my $class = shift;
69     my $self = bless {@_}, $class;
70     $self->{type} ||= '#element';
71     $self->{node} ||= [];
72     $self;
73     }
74    
75    
76     =item $x->append_node ($node)
77    
78     Appending given node to the object (as the last child).
79     If the type of given node is C<#fragment>, its all children, not the node
80     itself, are appended.
81    
82     This method returns the appended node unless the type of given node is C<#fragment>.
83     In such cases, this node (C<$x>) is returned.
84    
85     =cut
86    
87     sub append_node ($$) {
88     my $self = shift;
89     my $new_node = shift;
90     die "append_node: Invalid node" unless ref $new_node;
91     if ($new_node->{type} eq '#fragment') {
92     for (@{$new_node->{node}}) {
93     push @{$self->{node}}, $_;
94     $_->{parent} = $self;
95     }
96     $self;
97     } else {
98     push @{$self->{node}}, $new_node;
99     $new_node->{parent} = $self;
100     $new_node;
101     }
102     }
103    
104     =item $new_node = $x->append_new_node (%options)
105    
106     Appending a new node. The new node is returned.
107    
108     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
109    
110     =cut
111    
112     sub append_new_node ($;%) {
113     my $self = shift;
114     my %o = @_;
115     my $new_node = __PACKAGE__->new (%o);
116     push @{$self->{node}}, $new_node;
117     $new_node->{parent} = $self;
118     $new_node;
119     }
120    
121     =item $new_node = $x->append_text ($text)
122    
123     Appending given text as a new text node. The new text node is returned.
124    
125     =cut
126    
127     sub append_text ($$;%) {
128     my $self = shift;
129     my $s = shift;
130     #if (@{$self->{node}}[-1]->{type} eq '#text') {
131     # $self->{node}}[-1]->append_new_node (type => '#text', value => $s);
132     #} else {
133     $self->append_new_node (type => '#text', value => $s);
134     #}
135     }
136    
137     =item $attr_node = $x->get_attribute ($local_name, %options)
138    
139     Returns the attribute node whose local-name is C<$local_name>.
140    
141     Available options: C<namespace_uri>, C<make_new_node>.
142    
143     =cut
144    
145     sub get_attribute ($$;%) {
146     my $self = shift;
147     my ($name, %o) = @_;
148     for (@{$self->{node}}) {
149     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
150     return $_;
151     }
152     }
153     ## Node is not exist
154     if ($o{make_new_node}) {
155     return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri});
156     } else {
157     return undef;
158     }
159     }
160    
161     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
162    
163     Set the value of the attribute. The attribute node is returned.
164    
165     Available options: C<namespace_uri>.
166    
167     =cut
168    
169     sub set_attribute ($$$;%) {
170     my $self = shift;
171     my ($name, $val, %o) = @_;
172     for (@{$self->{node}}) {
173     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
174     $_->{value} = $val;
175     $_->{node} = [];
176     return $_;
177     }
178     }
179     return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri});
180     }
181    
182     =item \@children = $x->child_nodes
183    
184     Returns an array reference to child nodes.
185    
186     =item $local_name = $x->local_name
187    
188     Returns the local-name.
189    
190     =item $type = $x->node_type
191    
192     Returns the node type.
193    
194     =item $node = $x->parent_node
195    
196     Returns the parent node. If there is no parent node, undef is returned.
197    
198     =cut
199    
200     sub child_nodes ($) { shift->{node} }
201     sub local_name ($) { shift->{local_name} }
202     sub node_type ($) { shift->{type} }
203     sub parent_node ($) { shift->{parent} }
204    
205     =item $i = $x->count
206    
207     Returns the number of child nodes.
208    
209     =cut
210    
211     # TODO: support counting by type
212     sub count ($;@) {
213     scalar @{shift->{node}};
214     }
215    
216     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
217     sub _get_namespace_prefix ($$;%) {
218     my ($self, $uri) = (shift, shift);
219     my %o = @_;
220     if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
221     return $p if $self->_prefix_to_uri ($p) eq $uri;
222     } if ($Namespace_URI_to_prefix{$uri}) {
223     for (@{$Namespace_URI_to_prefix{$uri}}) {
224     my $pfx = $_; $pfx .= ':' if $pfx;
225     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
226     return $self->_uri_to_prefix ($uri => $pfx, %o);
227     }
228     }
229     } else {
230     my ($u_r_i, $pfx) = ($uri);
231     while ($u_r_i =~ s/([A-Za-z][0-9A-Za-z-]+)[^0-9A-Za-z-]*$//) {
232     my $p_f_x = $1 . ':';
233     next if uc (substr ($p_f_x, 0, 3)) eq 'XML';
234     unless ($self->_prefix_to_uri ($p_f_x)) {
235     $pfx = $p_f_x;
236     last;
237     }
238     }
239     if ($pfx) {
240     return $self->_uri_to_prefix ($uri => $pfx, %o);
241     } else {
242     while (1) {
243     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
244     unless ($self->_prefix_to_uri ($pfx)) {
245     return $self->_uri_to_prefix ($uri => $pfx, %o);
246     }
247     }
248     }
249     }
250     }
251    
252     # $uri or undef = $x->_prefix_to_uri ($prefix => [$new_uri])
253     sub _prefix_to_uri ($$;$%) {
254     my ($self, $prefix, $new_uri, %o) = @_;
255     return undef unless $self->_check_namespace_prefix ($prefix);
256     if ($new_uri) {
257     $self->{ns}->{$prefix} = $new_uri;
258     }
259     if (uc (substr $prefix, 0, 3) eq 'XML') {
260     return 'http://www.w3.org/XML/1998/namespace' if $prefix eq 'xml:';
261     return 'http://www.w3.org/2000/xmlns/' if $prefix eq 'xmlns:';
262     } if (defined $self->{ns}->{$prefix}) {
263     $self->{ns}->{$prefix};
264     } elsif (ref $self->{parent}) {
265     shift; # $self
266     $self->{parent}->_prefix_to_uri (@_);
267     } else {
268     undef;
269     }
270     }
271    
272     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
273     # use_no_prefix (default: 1): Allow default namespace (no prefix).
274     sub _uri_to_prefix ($$;$%) {
275     my ($self, $uri, $new_prefix, %o) = @_;
276     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
277     $self->{ns}->{$new_prefix} = $uri;
278     $new_prefix;
279     } else {
280     return 'xml:' if $uri eq 'http://www.w3.org/XML/1998/namespace';
281     return 'xmlns:' if $uri eq 'http://www.w3.org/2000/xmlns/';
282     for (keys %{$self->{ns}||{}}) {
283     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
284     return $_ if $self->{ns}->{$_} eq $uri;
285     }
286     if (ref $self->{parent}) {
287     shift; # $self
288     $self->{parent}->_uri_to_prefix (@_);
289     } else {
290     undef;
291     }
292     }
293     }
294    
295     =item $x->define_new_namespace ($prefix => $uri)
296    
297     Defines a new XML Namespace. This method is useful for root or section-level
298     element node.
299    
300     Returned value is unspecified in this version of this module.
301    
302     =cut
303    
304     sub define_new_namespace ($$$) {
305     my ($self, $prefix, $uri) = @_;
306     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
307     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
308     $self->_prefix_to_uri ($prefix => $uri);
309     } else {
310     undef;
311     }
312     }
313    
314     =item $qname = $x->qname
315    
316     Returns QName ((namespace-)qualified name) of the element type.
317     Undef is retuened when the type does not have its QName
318     (ie. when type is neither C<#element> or C<#attribute>).
319    
320     =cut
321    
322     sub qname ($) {
323     my $self = shift;
324     if ($self->_check_ncname ($self->{local_name})) {
325     if ($self->{type} eq '#element') {
326     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
327     unless $self->{_qname};
328     return $self->{_qname};
329     } elsif ($self->{type} eq '#attribute') {
330     return $self->attribute_name;
331     }
332     }
333     undef;
334     }
335    
336     =item $tag = $x->start_tag
337    
338     Returns the start tag (or something that marks the start of something, such as '<!-- '
339     for C<#comment> nodes).
340    
341     =cut
342    
343     sub start_tag ($) {
344     my $self = shift;
345     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
346     my $r = '<';
347     $r .= $self->qname;
348     for (@{$self->{node}}) {
349     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
350     }
351     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
352     if ($prefix) {
353     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
354     } else {
355     $r .= ' xmlns';
356     }
357     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
358     }
359     $r .= '>';
360     $r;
361     } elsif ($self->{type} eq '#comment') {
362     '<!-- ';
363     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
364     '<?' . $self->{local_name};
365     } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
366     my $r = '<!' . $self->{local_name} . ' ';
367     if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) {
368     my $qname;
369     for (@{$self->{parent}->{node}}) {
370     if ($_->{type} eq '#element') {
371     $qname = $_->qname;
372     last if $qname;
373     }
374     }
375     $r .= ($qname ? $qname : '#IMPLIED') . ' ';
376     $r;
377     }
378     } else {
379     '';
380     }
381     }
382    
383     =item $tag = $x->end_tag
384    
385     Returns the end tag (or something that marks the end of something, such as ' -->'
386     for C<#comment> nodes).
387    
388     =cut
389    
390     sub end_tag ($) {
391     my $self = shift;
392     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
393     '</' . $self->qname . '>';
394     } elsif ($self->{type} eq '#comment') {
395     ' -->';
396     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
397     '?>';
398     } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
399     my $r = '';
400     $r .= '>';
401     } else {
402     '';
403     }
404     }
405    
406     =item $tag = $x->attribute_name
407    
408     Returns the attribute name.
409    
410     =cut
411    
412     sub attribute_name ($) {
413     my $self = shift;
414     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
415     ($self->{namespace_uri} ?
416     (ref $self->{parent} ? $self->{parent} : $self)
417     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
418     .$self->{local_name};
419     } elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) {
420     $self->{local_name};
421     } else {
422     '';
423     }
424     }
425    
426     =item $tag = $x->attribute_value
427    
428     Returns the attribute value.
429    
430     =cut
431    
432     sub attribute_value ($) {
433     my $self = shift;
434     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
435     '"' . $self->_entitize ($self->inner_text) . '"';
436     } elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) {
437     my $t = $self->inner_text;
438     if ($t =~ /"/) {
439     if ($t =~ /'/) {
440     $t =~ s/"/&quot;/g;
441     return '"' . $t . '"';
442     } else {
443     return "'" . $t . "'";
444     }
445     } else {
446     return '"' . $t . '"';
447     }
448     } else {
449     '';
450     }
451     }
452    
453     =item $tag = $x->attribute
454    
455     Returns the attribute (name and value pair).
456    
457     =cut
458    
459     sub attribute ($) {
460     my $self = shift;
461     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
462     $self->attribute_name . '=' . $self->attribute_value;
463     } elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) {
464     $self->attribute_name . ' ' . $self->attribute_value;
465     } else {
466     '';
467     }
468     }
469    
470     =item $tag = $x->inner_xml
471    
472     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
473     element content without start- and end-tags is returned.)
474    
475     Note that for not all node types the behavior of this method is defined.
476     For example, returned value of C<#attribute> might be unexpected one
477     in this version of this module.
478    
479     =cut
480    
481     sub inner_xml ($) {
482     my $self = shift;
483     my $r = '';
484     if ($self->{type} eq '#comment') {
485     $r = $self->inner_text;
486     $r =~ s/--/-&#x45;/g;
487     } elsif ($self->{type} eq '#pi') {
488     if (length $self->{value}) {
489     $r = ' ' . $self->{value};
490     $r =~ s/\?>/?&gt;/g;
491     }
492     for (@{$self->{node}}) {
493     if ($_->node_type eq '#attribute') {
494     $r .= ' ' . $_->attribute;
495     } else {
496     my $s = $_->inner_text;
497     $s =~ s/\?>/?&gt;/g;
498     $r .= ' ' . $s if length $s;
499     }
500     }
501     } elsif ($self->{type} eq '#declaration') {
502     my ($isub, $pubid, $sysid) = ('');
503     for (@{$self->{node}}) {
504     if ($_->{type} eq '#pair') {
505     if ($_->{local_name} eq 'PUBLIC') {
506     $pubid = $_;
507     } elsif ($_->{local_name} eq 'SYSTEM') {
508     $sysid = $_;
509     }
510     } else {
511     $isub .= $_->outer_xml;
512     }
513     }
514     if ($sysid) {
515     if ($pubid) {
516     $r = $pubid->attribute . ' ' . $sysid->attribute_value;
517     } else {
518     $r = $sysid->attribute;
519     }
520     if ($isub) {
521     $r .= " [\n" . $isub . "\n]";
522     }
523     } else {
524     if ($isub) {
525     $r = "[\n" . $isub . "\n]";
526     } else {
527     $r = "[]";
528     }
529     }
530     } else {
531     if ($self->{type} ne '#xml') {
532     $r = $self->_entitize ($self->{value});
533     } else {
534     $r = $self->{value};
535     }
536     for (@{$self->{node}}) {
537     my $nt = $_->node_type;
538     if ((0||$self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
539     $r .= "\n";
540     }
541     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
542     }
543     }
544     $r;
545     }
546    
547    
548     =item $tag = $x->outer_xml
549    
550     Returns the node in XML syntax.
551    
552     =cut
553    
554     sub outer_xml ($) {
555     my $self = shift;
556     if ($self->{type} eq '#attribute') {
557     $self->attribute;
558     } else {
559     if ((0||$self->{option}->{indent}) && $self->{type} eq '#element') {
560     my $r = $self->start_tag;
561     my $c = $self->inner_xml;
562     if ($c) {
563     $c =~ s/\n/\n /g;
564     $r .= "\n " . $c . "\n";
565     }
566     $r .= $self->end_tag;
567     $r;
568     } else {
569     $self->start_tag . $self->inner_xml . $self->end_tag;
570     }
571     }
572     }
573    
574     =item $tag = $x->inner_text
575    
576     Returns the text content of the node. (In many case the returned value is same
577     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
578     But some classes that inherits this module might implement to return other
579     value (eg. to return the value of the alt attribute of html:img element).
580    
581     =cut
582    
583     sub inner_text ($) {
584     my $self = shift;
585     my $r = $self->{value};
586     for (@{$self->{node}}) {
587     $r .= $_->inner_text unless $_->node_type eq '#attribute';
588     }
589     $r;
590     }
591    
592     {no warnings; # prototype mismatch
593     *stringify = \&outer_xml;
594     }
595    
596     # $s = $x->_entitize ($s)
597     sub _entitize ($$) {
598     my ($self, $s) = (shift, shift);
599     $s =~ s/&/&amp;/g;
600     $s =~ s/</&lt;/g;
601     $s =~ s/>/&gt;/g;
602     $s =~ s/"/&quot;/g;
603     $s;
604     }
605    
606     # 1/0 = $x->_check_ncname ($s)
607     sub _check_ncname ($$) {
608     my $self = shift;
609     my $s = shift;
610     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
611     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
612     # \p{...}('*'/'+'/'{n,}') does not work...
613     $Cache{ncname}->{$s} = 1;
614     1;
615     } else {
616     $Cache{ncname}->{$s} = 0;
617     0;
618     }
619     }
620    
621     # 1/0 = $x->_check_namespace_prefix ($s)
622     sub _check_namespace_prefix ($$) {
623     my $self = shift;
624     my $s = shift;
625     return 0 unless defined $s;
626     return 1 if $s eq '';
627     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
628     $self->_check_ncname ($s);
629     }
630    
631     =back
632    
633     =head1 LICENSE
634    
635     Copyright 2003 Wakaba <w@suika.fam.cx>
636    
637     This program is free software; you can redistribute it and/or
638     modify it under the same terms as Perl itself.
639    
640     =cut
641    
642     1; # $Date: 2003/04/03 01:08:17 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24