/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Sun May 25 10:55:14 2003 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.4: +53 -4 lines
Bug fixes

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 wakaba 1.5 our $VERSION = do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 wakaba 1.2 use overload '""' => \&stringify,
22     fallback => 1;
23     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 wakaba 1.1 our %Namespace_URI_to_prefix = (
25     'DAV:' => [qw/dav webdav/],
26     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
27     'http://icl.com/saxon' => [qw/saxon/],
28     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
29     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
30     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
31     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
32     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
33 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
34 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
35     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
36     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
37     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
38     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
39     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
40     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
41     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
42     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
43 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
44 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
45 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
46 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
47     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
48     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
49     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
50     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
51     );
52     my %Cache;
53 wakaba 1.4 my $NS_SGML = 'urn:x-suika-fam-cx:markup:sgml:';
54 wakaba 1.1
55     =head1 METHODS
56    
57     =over 4
58    
59     =item $x = SuikaWiki::Markup::XML->new (%options)
60    
61     Returns new instance of the module. It is itself a node.
62    
63 wakaba 1.4 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
64 wakaba 1.1
65     =cut
66    
67     sub new ($;%) {
68     my $class = shift;
69     my $self = bless {@_}, $class;
70     $self->{type} ||= '#element';
71 wakaba 1.5 if ($self->{qname}) {
72     ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname});
73     $self->{_qname} = $self->{qname};
74     }
75     if (defined $self->{namespace_prefix}) {
76     $self->{namespace_prefix} .= ':' if $self->{namespace_prefix}
77     && substr ($self->{namespace_prefix}, -1) ne ':';
78     $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri} if defined $self->{namespace_uri};
79     }
80 wakaba 1.4 for (qw/local_name value/) {
81 wakaba 1.2 if (ref $self->{$_}) {
82     $self->{$_}->{parent} = $self;
83     }
84     }
85 wakaba 1.1 $self->{node} ||= [];
86     $self;
87     }
88    
89 wakaba 1.5 sub _ns_parse_qname ($$) {
90     shift;
91     my $qname = shift;
92     if ($qname =~ /:/) {
93     return split /:/, $qname, 2;
94     } else {
95     return (undef, $qname);
96     }
97     }
98 wakaba 1.1
99     =item $x->append_node ($node)
100    
101     Appending given node to the object (as the last child).
102     If the type of given node is C<#fragment>, its all children, not the node
103     itself, are appended.
104    
105     This method returns the appended node unless the type of given node is C<#fragment>.
106     In such cases, this node (C<$x>) is returned.
107    
108 wakaba 1.2 Available options: C<node_or_text>.
109    
110 wakaba 1.1 =cut
111    
112 wakaba 1.2 sub append_node ($$;%) {
113 wakaba 1.1 my $self = shift;
114 wakaba 1.2 my ($new_node, %o) = @_;
115     unless (ref $new_node) {
116     if ($o{node_or_text}) {
117     return $self->append_text ($new_node);
118     } else {
119     die "append_node: Invalid node" unless ref $new_node;
120     }
121     }
122 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
123     for (@{$new_node->{node}}) {
124     push @{$self->{node}}, $_;
125     $_->{parent} = $self;
126     }
127     $self;
128     } else {
129     push @{$self->{node}}, $new_node;
130     $new_node->{parent} = $self;
131     $new_node;
132     }
133     }
134    
135     =item $new_node = $x->append_new_node (%options)
136    
137     Appending a new node. The new node is returned.
138    
139     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
140    
141     =cut
142    
143     sub append_new_node ($;%) {
144     my $self = shift;
145     my %o = @_;
146     my $new_node = __PACKAGE__->new (%o);
147     push @{$self->{node}}, $new_node;
148     $new_node->{parent} = $self;
149     $new_node;
150     }
151    
152     =item $new_node = $x->append_text ($text)
153    
154     Appending given text as a new text node. The new text node is returned.
155    
156     =cut
157    
158     sub append_text ($$;%) {
159     my $self = shift;
160     my $s = shift;
161     #if (@{$self->{node}}[-1]->{type} eq '#text') {
162     # $self->{node}}[-1]->append_new_node (type => '#text', value => $s);
163     #} else {
164     $self->append_new_node (type => '#text', value => $s);
165     #}
166     }
167    
168 wakaba 1.2 sub append_baretext ($$;%) {
169     my $self = shift;
170     my $s = shift;
171     $self->append_new_node (type => '#xml', value => $s);
172     }
173    
174 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
175    
176     Returns the attribute node whose local-name is C<$local_name>.
177    
178     Available options: C<namespace_uri>, C<make_new_node>.
179    
180     =cut
181    
182     sub get_attribute ($$;%) {
183     my $self = shift;
184     my ($name, %o) = @_;
185     for (@{$self->{node}}) {
186     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
187     return $_;
188     }
189     }
190     ## Node is not exist
191     if ($o{make_new_node}) {
192     return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri});
193     } else {
194     return undef;
195     }
196     }
197    
198     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
199    
200     Set the value of the attribute. The attribute node is returned.
201    
202     Available options: C<namespace_uri>.
203    
204     =cut
205    
206     sub set_attribute ($$$;%) {
207     my $self = shift;
208     my ($name, $val, %o) = @_;
209     for (@{$self->{node}}) {
210     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
211     $_->{value} = $val;
212     $_->{node} = [];
213     return $_;
214     }
215     }
216     return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri});
217     }
218    
219     =item \@children = $x->child_nodes
220    
221     Returns an array reference to child nodes.
222    
223 wakaba 1.4 =item $local_name = $x->local_name ([$new_name])
224 wakaba 1.1
225 wakaba 1.4 Returns or set the local-name.
226    
227     =item $uri = $x->namespace_uri ([$new_uri])
228    
229     Returns or set namespace name (URI) of the element or the attribute
230 wakaba 1.1
231 wakaba 1.5 =item $uri = $x->namespace_prefix ([$new_prefix])
232    
233     Returns or set namespace prefix of the element or the attribute.
234     You may give C<$new_prefix> in form either 'foo' or 'foo:'.
235     To indicate "default" prefix, use '' (length == 0 string).
236    
237 wakaba 1.1 =item $type = $x->node_type
238    
239     Returns the node type.
240    
241     =item $node = $x->parent_node
242    
243     Returns the parent node. If there is no parent node, undef is returned.
244    
245     =cut
246    
247     sub child_nodes ($) { shift->{node} }
248 wakaba 1.4 sub local_name ($;$) {
249     my ($self, $newname) = @_;
250     if ($newname) {
251     $self->{local_name} = $newname;
252     }
253     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
254     $self->{local_name}->{local_name};
255     } else {
256     $self->{local_name}
257     }
258     }
259 wakaba 1.1 sub node_type ($) { shift->{type} }
260     sub parent_node ($) { shift->{parent} }
261    
262 wakaba 1.4 ## TODO: obsolete
263     sub target_name ($;$) {
264     my ($self, $new) = @_;
265     if (defined $new) {
266     $self->{target_name} = $new;
267     }
268     $self->{target_name};
269     }
270    
271     sub namespace_uri ($;$) {
272     my ($self, $new_uri) = @_;
273     if (defined $new_uri) {
274     $self->{namespace_uri} = $new_uri;
275     }
276     $self->{namespace_uri};
277     }
278 wakaba 1.5 sub namespace_prefix ($;$) {
279     my ($self, $new_pfx) = @_;
280     if (defined $new_pfx && $self->{namespace_uri}) {
281     $new_pfx .= ':' if $new_pfx;
282     $self->{namespace_prefix} = $new_pfx;
283     $self->{ns}->{$new_pfx} = $self->{namespace_uri};
284     }
285     $self->_get_namespace_prefix ($self->{namespace_uri});
286     }
287 wakaba 1.4
288 wakaba 1.1 =item $i = $x->count
289    
290     Returns the number of child nodes.
291    
292     =cut
293    
294     # TODO: support counting by type
295     sub count ($;@) {
296 wakaba 1.2 my $self = shift;
297     (defined $self->{value} ? 1 : 0) + scalar @{$self->{node}};
298 wakaba 1.1 }
299    
300     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
301     sub _get_namespace_prefix ($$;%) {
302     my ($self, $uri) = (shift, shift);
303     my %o = @_;
304     if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
305     return $p if $self->_prefix_to_uri ($p) eq $uri;
306     } if ($Namespace_URI_to_prefix{$uri}) {
307     for (@{$Namespace_URI_to_prefix{$uri}}) {
308     my $pfx = $_; $pfx .= ':' if $pfx;
309     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
310     return $self->_uri_to_prefix ($uri => $pfx, %o);
311     }
312     }
313     } else {
314     my ($u_r_i, $pfx) = ($uri);
315 wakaba 1.4 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
316     my @u_r_i = split / /, $u_r_i;
317     for (reverse @u_r_i) {
318     if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
319     my $p_f_x = $1 . ':';
320     next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
321     unless ($self->_prefix_to_uri ($p_f_x)) {
322     $pfx = $p_f_x;
323     last;
324     }
325 wakaba 1.1 }
326     }
327     if ($pfx) {
328     return $self->_uri_to_prefix ($uri => $pfx, %o);
329     } else {
330     while (1) {
331     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
332     unless ($self->_prefix_to_uri ($pfx)) {
333     return $self->_uri_to_prefix ($uri => $pfx, %o);
334     }
335     }
336     }
337     }
338     }
339    
340 wakaba 1.4 sub _set_prefix_to_uri ($$$;%) {
341     my ($self, $prefix => $uri, %o) = @_;
342     return undef unless $self->_check_namespace_prefix ($prefix);
343     $self->{ns}->{$prefix} = $uri;
344     $self->_prefix_to_uri ($prefix);
345     }
346    
347     ## TODO: removing ns declare (1.1) support
348     # $uri or undef = $x->_prefix_to_uri ($prefix)
349 wakaba 1.1 sub _prefix_to_uri ($$;$%) {
350 wakaba 1.4 my ($self, $prefix, %o) = @_;
351 wakaba 1.1 return undef unless $self->_check_namespace_prefix ($prefix);
352     if (uc (substr $prefix, 0, 3) eq 'XML') {
353     return 'http://www.w3.org/XML/1998/namespace' if $prefix eq 'xml:';
354     return 'http://www.w3.org/2000/xmlns/' if $prefix eq 'xmlns:';
355 wakaba 1.2 }
356     if (defined $self->{ns}->{$prefix}) {
357 wakaba 1.1 $self->{ns}->{$prefix};
358     } elsif (ref $self->{parent}) {
359     shift; # $self
360     $self->{parent}->_prefix_to_uri (@_);
361     } else {
362     undef;
363     }
364     }
365    
366     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
367     # use_no_prefix (default: 1): Allow default namespace (no prefix).
368     sub _uri_to_prefix ($$;$%) {
369     my ($self, $uri, $new_prefix, %o) = @_;
370     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
371     $self->{ns}->{$new_prefix} = $uri;
372     $new_prefix;
373     } else {
374     return 'xml:' if $uri eq 'http://www.w3.org/XML/1998/namespace';
375     return 'xmlns:' if $uri eq 'http://www.w3.org/2000/xmlns/';
376     for (keys %{$self->{ns}||{}}) {
377     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
378     return $_ if $self->{ns}->{$_} eq $uri;
379     }
380 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
381 wakaba 1.1 shift; # $self
382     $self->{parent}->_uri_to_prefix (@_);
383     } else {
384     undef;
385     }
386     }
387     }
388    
389     =item $x->define_new_namespace ($prefix => $uri)
390    
391     Defines a new XML Namespace. This method is useful for root or section-level
392     element node.
393    
394     Returned value is unspecified in this version of this module.
395    
396     =cut
397    
398 wakaba 1.4 ## TODO: structured URI (such as http://&server;/) support
399 wakaba 1.1 sub define_new_namespace ($$$) {
400     my ($self, $prefix, $uri) = @_;
401     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
402     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
403 wakaba 1.4 $self->_set_prefix_to_uri ($prefix => $uri);
404 wakaba 1.1 } else {
405     undef;
406     }
407     }
408    
409 wakaba 1.4 =item $uri = $x->defined_namespace_prefix ($prefix)
410    
411     Query whether the namespace prefix is defined or not.
412     If defined, return namespace name (URI).
413    
414     =cut
415    
416     sub defined_namespace_prefix ($$) {
417     my ($self, $prefix) = @_;
418     $prefix .= ':' if $prefix;#Carp::carp join ",", ';',@_;
419     $self->_prefix_to_uri ($prefix);
420     }
421    
422 wakaba 1.1 =item $qname = $x->qname
423    
424     Returns QName ((namespace-)qualified name) of the element type.
425     Undef is retuened when the type does not have its QName
426     (ie. when type is neither C<#element> or C<#attribute>).
427    
428     =cut
429    
430     sub qname ($) {
431     my $self = shift;
432     if ($self->_check_ncname ($self->{local_name})) {
433     if ($self->{type} eq '#element') {
434     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
435     unless $self->{_qname};
436     return $self->{_qname};
437     } elsif ($self->{type} eq '#attribute') {
438     return $self->attribute_name;
439     }
440     }
441     undef;
442     }
443    
444     =item $tag = $x->start_tag
445    
446 wakaba 1.4 Returns the start tag (or something that marks the start of something, such as '<!--'
447 wakaba 1.1 for C<#comment> nodes).
448    
449     =cut
450    
451     sub start_tag ($) {
452     my $self = shift;
453     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
454     my $r = '<';
455     $r .= $self->qname;
456     for (@{$self->{node}}) {
457     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
458     }
459     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
460     if ($prefix) {
461     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
462     } else {
463     $r .= ' xmlns';
464     }
465     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
466     }
467     $r .= '>';
468     $r;
469     } elsif ($self->{type} eq '#comment') {
470 wakaba 1.4 '<!--';
471     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
472     '<?' . ($self->{local_name});
473 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
474     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
475     '&#x';
476     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
477     '&#';
478 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
479     if ($self->{local_name}->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
480 wakaba 1.2 '%';
481     } else {
482     '&';
483     }
484 wakaba 1.4 } elsif ($self->_check_ncname ($self->{local_name})) {
485 wakaba 1.2 if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
486     '%';
487     } else {
488     '&';
489     }
490     } else { # error
491     '';
492     }
493 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
494     '<!' . {
495     $NS_SGML.'attlist' => 'ATTLIST',
496     $NS_SGML.'doctype' => 'DOCTYPE',
497     $NS_SGML.'element' => 'ELEMENT',
498     $NS_SGML.'entity' => 'ENTITY',
499     $NS_SGML.'entity:parameter' => 'ENTITY %',
500     $NS_SGML.'notation' => 'NOTATION',
501     $NS_SGML.'sgml' => 'SGML',
502     }->{$self->{namespace_uri}} . ' ';
503 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
504     my $r = '<!' . $self->{local_name} . ' ';
505     if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) {
506     my $qname;
507     for (@{$self->{parent}->{node}}) {
508     if ($_->{type} eq '#element') {
509     $qname = $_->qname;
510     last if $qname;
511     }
512     }
513     $r .= ($qname ? $qname : '#IMPLIED') . ' ';
514 wakaba 1.2 }
515     $r;
516     } elsif ($self->{type} eq '#section') {
517     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
518     '<![' . $self->{local_name} . '[';
519     } elsif ($self->_check_ncname ($self->{local_name})) {
520     '<![' . $self->{local_name} . '[';
521     } else { # error
522     '';
523 wakaba 1.1 }
524     } else {
525     '';
526     }
527     }
528    
529     =item $tag = $x->end_tag
530    
531 wakaba 1.4 Returns the end tag (or something that marks the end of something, such as '-->'
532 wakaba 1.1 for C<#comment> nodes).
533    
534     =cut
535    
536     sub end_tag ($) {
537     my $self = shift;
538     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
539     '</' . $self->qname . '>';
540     } elsif ($self->{type} eq '#comment') {
541 wakaba 1.4 '-->';
542 wakaba 1.1 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
543     '?>';
544 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
545     ';';
546 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
547     '>';
548 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
549 wakaba 1.4 '>';
550 wakaba 1.2 } elsif ($self->{type} eq '#section') {
551     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
552     ']]>';
553     } elsif ($self->_check_ncname ($self->{local_name})) {
554     ']]>';
555     } else { # error
556     '';
557     }
558 wakaba 1.1 } else {
559     '';
560     }
561     }
562    
563     =item $tag = $x->attribute_name
564    
565     Returns the attribute name.
566    
567     =cut
568    
569     sub attribute_name ($) {
570     my $self = shift;
571     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
572     ($self->{namespace_uri} ?
573     (ref $self->{parent} ? $self->{parent} : $self)
574     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
575     .$self->{local_name};
576     } else {
577     '';
578     }
579     }
580    
581     =item $tag = $x->attribute_value
582    
583     Returns the attribute value.
584    
585     =cut
586    
587 wakaba 1.4 sub attribute_value ($;%) {
588 wakaba 1.1 my $self = shift;
589 wakaba 1.4 my %o = @_;
590 wakaba 1.1 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
591 wakaba 1.5 my $r = '"';
592     if (ref ($self->{value}) && $self->{value}->_CLASS_NAME eq $self->_CLASS_NAME) {
593     unshift @{$self->{node}}, $self->{value};
594     undef $self->{value};
595     } else {
596     $r .= $self->_entitize ($self->{value});
597     }
598 wakaba 1.4 for (@{$self->{node}}) {
599     my $nt = $_->node_type;
600     if ($nt eq '#reference' || $nt eq '#xml') {
601     $r .= $_->outer_xml;
602     } elsif ($nt ne '#attribute') {
603     $r .= $self->_entitize ($_->inner_text, percent => $o{escape_percent});
604     }
605     }
606     return $r . '"';
607     } else {
608     '';
609     }
610     }
611    
612     sub entity_value ($;%) {
613     my $self = shift;
614     my %o = @_;
615     my $_entitize = sub {
616     my $s = shift;
617     $s =~ s/&/&amp;/g;
618     $s =~ s/%/&#x25;/g;
619     $s =~ s/"/&quot;/g;
620     $s;
621     };
622     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
623     my $r = '"' . &$_entitize ($self->{value});
624     for (@{$self->{node}}) {
625     my $nt = $_->node_type;
626     if ($nt eq '#reference' || $nt eq '#xml') {
627     $r .= $_->outer_xml;
628     } elsif ($nt ne '#attribute') {
629     $r .= &$_entitize ($_->inner_text);
630 wakaba 1.1 }
631     }
632 wakaba 1.4 return $r . '"';
633 wakaba 1.1 } else {
634     '';
635     }
636     }
637    
638     =item $tag = $x->attribute
639    
640     Returns the attribute (name and value pair).
641    
642     =cut
643    
644     sub attribute ($) {
645     my $self = shift;
646     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
647     $self->attribute_name . '=' . $self->attribute_value;
648     } else {
649     '';
650     }
651     }
652    
653 wakaba 1.2 sub external_id ($;%) {
654     my $self = shift;
655     my %o = @_;
656     my ($pubid, $sysid, $ndata);
657     for (@{$self->{node}}) {
658 wakaba 1.4 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
659 wakaba 1.2 if ($_->{local_name} eq 'PUBLIC') {
660 wakaba 1.4 $pubid = $_->inner_text;
661 wakaba 1.2 } elsif ($_->{local_name} eq 'SYSTEM') {
662 wakaba 1.4 $sysid = $_->inner_text;
663 wakaba 1.2 } elsif ($_->{local_name} eq 'NDATA') {
664 wakaba 1.4 $ndata = $_->inner_text;
665     undef $ndata unless $self->_check_ncname ($ndata);
666 wakaba 1.2 }
667     }
668     }
669     my $r = '';
670 wakaba 1.4 if (length $pubid) {
671     $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
672     }
673     if (length $sysid) {
674     if ($sysid =~ /"/) {
675     if ($sysid =~ /'/) {
676     $sysid =~ s/"/&quot;/; $sysid = '"' . $sysid . '"';
677     } else {
678     $sysid = "'" . $sysid . "'";
679     }
680     } else {
681     $sysid = '"' . $sysid . '"';
682     }
683     }
684 wakaba 1.2 if ($pubid && $sysid) {
685 wakaba 1.4 $r = 'PUBLIC "' . $pubid . '" ' . $sysid;
686 wakaba 1.2 } elsif ($sysid) {
687 wakaba 1.4 $r = 'SYSTEM ' . $sysid;
688 wakaba 1.2 } elsif ($pubid && $o{allow_pubid_only}) {
689 wakaba 1.4 $r = 'PUBLIC "' . $pubid . '"';
690 wakaba 1.2 }
691     if ($r && $ndata && $o{use_ndata}) {
692 wakaba 1.4 $r .= ' NDATA ' . $ndata;
693 wakaba 1.2 }
694     $r;
695     }
696    
697     =item $s = $x->content_spec
698    
699     Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>)
700     or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>).
701    
702     =cut
703    
704     sub content_spec ($) {
705     my $self = shift;
706     if ($self->{type} eq '#element') {
707     my $text = 0;
708     my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}};
709     $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text;
710    
711     return $contentspec ? '(' . $contentspec . ')' : 'EMPTY';
712     } elsif ($self->{type} eq '#attribute') {
713     my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t";
714     my $default = $self->{default_decl};
715     $default .= ' ' . $self->attribute_value if $default eq '#FIXED';
716     unless ($default) {
717     $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED';
718     }
719     return $attdef . $default;
720     }
721     }
722    
723 wakaba 1.1 =item $tag = $x->inner_xml
724    
725     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
726     element content without start- and end-tags is returned.)
727    
728     Note that for not all node types the behavior of this method is defined.
729     For example, returned value of C<#attribute> might be unexpected one
730     in this version of this module.
731    
732     =cut
733    
734 wakaba 1.3 sub inner_xml ($;%) {
735 wakaba 1.1 my $self = shift;
736 wakaba 1.3 my %o = @_;
737 wakaba 1.1 my $r = '';
738     if ($self->{type} eq '#comment') {
739     $r = $self->inner_text;
740     $r =~ s/--/-&#x45;/g;
741     } elsif ($self->{type} eq '#pi') {
742     if (length $self->{value}) {
743     $r = ' ' . $self->{value};
744 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
745 wakaba 1.1 }
746     for (@{$self->{node}}) {
747     if ($_->node_type eq '#attribute') {
748     $r .= ' ' . $_->attribute;
749     } else {
750     my $s = $_->inner_text;
751     $s =~ s/\?>/?&gt;/g;
752     $r .= ' ' . $s if length $s;
753     }
754     }
755 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
756     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
757     $r = sprintf '%02X', $self->{value};
758     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
759     $r = sprintf '%02d', $self->{value};
760 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
761     $r = $self->{local_name}->{local_name};
762     } elsif ($self->_check_ncname ($self->{local_name})) {
763     $r = ($self->{local_name});
764 wakaba 1.2 } else { # error
765     $r = '';
766     }
767 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
768 wakaba 1.2 if ($self->{local_name} eq 'DOCTYPE') {
769     my ($isub, $xid) = ('', $self->external_id);
770     for (@{$self->{node}}) {
771 wakaba 1.4 $isub .= $_->outer_xml if $_->{type} ne '#attribute';
772 wakaba 1.2 }
773     if ($xid) {
774     $r = $xid;
775     if ($isub) {
776     $r .= " [\n" . $isub . "]";
777 wakaba 1.1 }
778     } else {
779 wakaba 1.2 if ($isub) {
780     $r = "[\n" . $isub . "]";
781     } else {
782     $r = "[]";
783     }
784 wakaba 1.1 }
785 wakaba 1.4 } elsif ($self->{namespace_uri} eq $NS_SGML.'doctype') {
786     my $root = $self->get_attribute ('qname');
787     ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
788     for (@{$self->{parent}->{node}}) {
789     if ($_->{type} eq '#element') {
790     $root = $_->qname;
791     last if $root;
792     }
793     }
794     $root = '#IMPLIED';
795     }) : ($root = '#IMPLIED')); ## error!
796     my ($isub, $xid) = ('', $self->external_id);
797     for (@{$self->{node}}) {
798     $isub .= $_->outer_xml if $_->{type} ne '#attribute';
799 wakaba 1.2 }
800     if ($xid) {
801 wakaba 1.4 $r = $xid;
802     if ($isub) {
803     $r .= " [\n" . $isub . "]";
804     }
805     } else {
806     if ($isub) {
807     $r = "[\n" . $isub . "]";
808     } else {
809     $r = "[]";
810     }
811     }
812     $r = $root . ' ' . $r;
813     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity'
814     || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter'
815     || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:notation') {
816     my %xid_opt;
817     $r = $self->{local_name} . ' ' if $self->_check_ncname ($self->{local_name});
818     if ($self->{namespace_uri} eq $NS_SGML.'entity:parameter') {
819     #$r = '% ' . $r;
820     } elsif ($self->{namespace_uri} eq $NS_SGML.'entity') {
821     $xid_opt{use_ndata} = 1;
822     } elsif ($self->{namespace_uri} eq $NS_SGML.'notation') {
823     $xid_opt{allow_pubid_only} = 1;
824     }
825    
826     my ($v, $xid) = ($self->{value}, $self->external_id (%xid_opt));
827     if ($xid) { ## External ID
828 wakaba 1.2 $r .= $xid;
829 wakaba 1.4 } else { ## EntityValue
830     my $entity_value = $self->get_attribute ('value');
831     if ($entity_value) { # <!ENTITY foo "bar">
832     $r .= $entity_value->entity_value;
833     } else { ## Parameter entity reference
834     $r .= $self->{value};
835     for (@{$self->{node}}) {
836     $r .= $_->outer_xml;
837     }
838 wakaba 1.2 }
839 wakaba 1.1 }
840 wakaba 1.4 } elsif ($self->{namespace_uri} eq $NS_SGML.'element') {
841     my $qname = $self->get_attribute ('qname');
842     ref $qname ? $qname = $qname->inner_text : undef;
843     if ($qname && $self->_check_name ($qname)) {
844     ## Element type name is defined
845     $r = $qname . ' ';
846 wakaba 1.3 if ($o{output_tag_omit_declaration}) {
847 wakaba 1.4 $r .= ($self->{flag}->{element_tag_start_omitable} ? 'o ' : '- ')
848     .
849     ($self->{flag}->{element_tag_end_omitable} ? 'o ' : '- ');
850 wakaba 1.3 }
851 wakaba 1.4 my $rs = $self->inner_text (output_ref_as_is => 1);
852     $r .= $rs || 'ANY';
853 wakaba 1.2 } else {
854 wakaba 1.4 #if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') {
855     ### Element prototype is given
856     # $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec;
857     #} elsif () {
858     ### Element type name and contentspec is given
859     # $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY');
860     #} else {
861 wakaba 1.2 ## (Element type name and contetnspac) is given
862 wakaba 1.4 $r .= $self->inner_text (output_ref_as_is => 1);
863     # || 'Name ANY'; # error
864     #}
865 wakaba 1.2 }
866     } elsif ($self->{local_name} eq 'ATTLIST') {
867     if ($self->_check_name ($self->{target_name})) {
868     $r = $self->{target_name};
869     }
870     my $t = $self->inner_text (output_ref_as_is => 1);
871     $r .= "\n\t" . $t if $t;
872     $r ||= 'Name'; # error!
873     for (@{$self->{node}}) {
874     if ($_->{type} eq '#attribute') {
875     $r .= "\n\t" . $_->content_spec;
876     }
877 wakaba 1.1 }
878 wakaba 1.2 } else { # unknown
879 wakaba 1.3 for (@{$self->{node}}) {
880     $r .= $_->outer_xml;
881     }
882 wakaba 1.1 }
883 wakaba 1.2 } elsif ($self->{type} eq '#section' && !ref $self->{local_name} && $self->{local_name} eq 'CDATA') {
884     $r = $self->inner_text;
885     $r =~ s/]]>/]]>]]<![CDATA[>/g;
886 wakaba 1.1 } else {
887     if ($self->{type} ne '#xml') {
888     $r = $self->_entitize ($self->{value});
889     } else {
890     $r = $self->{value};
891     }
892     for (@{$self->{node}}) {
893     my $nt = $_->node_type;
894 wakaba 1.4 if (($self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
895 wakaba 1.1 $r .= "\n";
896     }
897     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
898     }
899     }
900     $r;
901     }
902    
903    
904     =item $tag = $x->outer_xml
905    
906     Returns the node in XML syntax.
907    
908     =cut
909    
910     sub outer_xml ($) {
911     my $self = shift;
912     if ($self->{type} eq '#attribute') {
913     $self->attribute;
914     } else {
915 wakaba 1.4 if ($self->{option}->{indent} && $self->{type} eq '#element') {
916 wakaba 1.1 my $r = $self->start_tag;
917     my $c = $self->inner_xml;
918 wakaba 1.4 if (!length $c && $self->{option}->{use_EmptyElemTag}) {
919     substr ($r, -1) = ' />';
920     } else {
921     if ($c) {
922     $c =~ s/\n/\n /g;
923     $r .= "\n " . $c . "\n";
924     }
925     $r .= $self->end_tag;
926 wakaba 1.1 }
927     $r;
928     } else {
929 wakaba 1.4 my $r = $self->start_tag;
930     my $c = $self->inner_xml;
931     if ($self->{type} eq '#element' && !length $c && $self->{option}->{use_EmptyElemTag}) {
932     substr ($r, -1) = ' />';
933     } else {
934     $r .= $c . $self->end_tag;
935     #$r .= "\n" if $self->{type} eq '#declaration';
936     }
937 wakaba 1.2 $r;
938     #'{'.$self->{type}.': '.$r.'}'; # for debug
939 wakaba 1.1 }
940     }
941     }
942    
943     =item $tag = $x->inner_text
944    
945     Returns the text content of the node. (In many case the returned value is same
946     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
947     But some classes that inherits this module might implement to return other
948     value (eg. to return the value of the alt attribute of html:img element).
949    
950 wakaba 1.2 Available options: C<output_ref_as_is>.
951    
952 wakaba 1.1 =cut
953    
954 wakaba 1.2 sub inner_text ($;%) {
955 wakaba 1.1 my $self = shift;
956 wakaba 1.2 my %o = @_;
957 wakaba 1.5 my $r = '';
958     if (ref ($self->{value}) && $self->{value}->_CLASS_NAME eq $self->_CLASS_NAME) {
959     unshift @{$self->{node}}, $self->{value};
960     undef $self->{value};
961     } else {
962     $r = $self->{value};
963     }
964 wakaba 1.2 if ($o{output_ref_as_is}) { ## output as if RCDATA
965 wakaba 1.4 $r =~ s/&/&amp;/g;
966     for my $node (@{$self->{node}}) {
967     my $nt = $node->node_type;
968     if ($nt eq '#reference' || $nt eq '#xml') {
969     $r .= $node->outer_xml;
970 wakaba 1.2 } elsif ($nt ne '#attribute') {
971 wakaba 1.4 $r .= map {s/&/&amp;/g; $_} $node->inner_text;
972 wakaba 1.2 }
973     }
974     } else {
975     for (@{$self->{node}}) {
976     $r .= $_->inner_text unless $_->node_type eq '#attribute';
977     }
978 wakaba 1.1 }
979     $r;
980     }
981    
982     {no warnings; # prototype mismatch
983     *stringify = \&outer_xml;
984     }
985    
986 wakaba 1.5 sub _CLASS_NAME ($) {
987     'SuikaWiki::Markup::XML';
988     }
989    
990 wakaba 1.1 # $s = $x->_entitize ($s)
991 wakaba 1.4 sub _entitize ($$;%) {
992     my ($self, $s, %o) = (shift, shift, @_);
993 wakaba 1.1 $s =~ s/&/&amp;/g;
994     $s =~ s/</&lt;/g;
995     $s =~ s/>/&gt;/g;
996     $s =~ s/"/&quot;/g;
997 wakaba 1.4 $s =~ s/%/&#x25;/g if $o{percent};
998     $s =~ s/'/&#x27;/g if $o{apos};
999 wakaba 1.1 $s;
1000     }
1001    
1002 wakaba 1.2 # 1/0 = $x->_check_name ($s)
1003     sub _check_name ($$) {
1004     my $self = shift;
1005     my $s = shift;
1006     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
1007     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
1008     # \p{...}('*'/'+'/'{n,}') does not work...
1009     $Cache{name}->{$s} = 1;
1010     1;
1011     } else {
1012     $Cache{name}->{$s} = 0;
1013     0;
1014     }
1015     }
1016 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
1017     sub _check_ncname ($$) {
1018     my $self = shift;
1019     my $s = shift;
1020     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
1021     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
1022     # \p{...}('*'/'+'/'{n,}') does not work...
1023     $Cache{ncname}->{$s} = 1;
1024     1;
1025     } else {
1026     $Cache{ncname}->{$s} = 0;
1027     0;
1028     }
1029     }
1030    
1031     # 1/0 = $x->_check_namespace_prefix ($s)
1032     sub _check_namespace_prefix ($$) {
1033     my $self = shift;
1034     my $s = shift;
1035     return 0 unless defined $s;
1036     return 1 if $s eq '';
1037     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
1038     $self->_check_ncname ($s);
1039     }
1040    
1041 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
1042     ## multiple parents.
1043     ## TODO: normalize namespace URI (removing non URI chars)
1044    
1045     sub flag ($$;$) {
1046     my ($self, $name, $value) = @_;
1047     if (defined $value) {
1048     $self->{flag}->{$name} = $value;
1049     }
1050     $self->{flag}->{$name};
1051     }
1052    
1053 wakaba 1.3 sub option ($$;$) {
1054     my ($self, $name, $value) = @_;
1055     if (defined $value) {
1056     $self->{option}->{$name} = $value;
1057     }
1058     $self->{option}->{$name};
1059     }
1060    
1061 wakaba 1.1 =back
1062    
1063 wakaba 1.2 =head1 NODE TYPES
1064    
1065     =over 4
1066    
1067     =item #attribute
1068    
1069     Attribute. Its XML representation takes the form of NAME="VALUE".
1070    
1071     =item #comment
1072    
1073     Comment declarement. <!-- -->
1074    
1075     =item #declarement
1076    
1077     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1078     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1079    
1080     =item #element
1081    
1082     Element. Its XML representation consists of start tag, content and end tag,
1083     like <TYPE>content</TYPE>.
1084    
1085     =item #fragment
1086    
1087     Fragment of nodes. It's similar to DOM's fragment node.
1088    
1089     =item #pi
1090    
1091     Prosessing instruction. <?NAME VALUE?>
1092    
1093     =item #reference
1094    
1095     Character reference or general or parameter entity reference.
1096     &#nnnn;, &#xhhhh;, &name;, %name;.
1097    
1098     =item #section
1099    
1100     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1101     <![%type;[...]]>
1102    
1103     =item #text
1104    
1105     Text.
1106    
1107     =item #xml
1108    
1109     Preformatted XML text.
1110    
1111     =cut
1112    
1113 wakaba 1.4 =head1 RESTRICTIONS
1114    
1115     =over 4
1116    
1117     =item XML without XML Namespace is not supported.
1118    
1119     =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1120    
1121     For example, next code generates invalid (non-well-formed) XML Namespace document.
1122    
1123     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1124     print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1125    
1126     So you must write like:
1127    
1128     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1129     $x->define_new_namespace ('' => '');
1130     print $x; # <elementType xmlns=""></elementType>
1131    
1132     =back
1133    
1134 wakaba 1.1 =head1 LICENSE
1135    
1136     Copyright 2003 Wakaba <w@suika.fam.cx>
1137    
1138     This program is free software; you can redistribute it and/or
1139     modify it under the same terms as Perl itself.
1140    
1141     =cut
1142    
1143 wakaba 1.5 1; # $Date: 2003/05/24 04:52:19 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24