/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations) (download)
Sat May 31 07:01:46 2003 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.5: +4 -2 lines
XMLize

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 wakaba 1.6 our $VERSION = do{my @r=(q$Revision: 1.5 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 wakaba 1.2 use overload '""' => \&stringify,
22     fallback => 1;
23     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 wakaba 1.1 our %Namespace_URI_to_prefix = (
25     'DAV:' => [qw/dav webdav/],
26     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
27     'http://icl.com/saxon' => [qw/saxon/],
28     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
29     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
30     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
31     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
32     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
33 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
34 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
35     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
36     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
37     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
38     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
39     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
40     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
41     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
42     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
43 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
44 wakaba 1.6 'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/],
45 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
46 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
47 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
48     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
49     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
50     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
51     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
52     );
53     my %Cache;
54 wakaba 1.4 my $NS_SGML = 'urn:x-suika-fam-cx:markup:sgml:';
55 wakaba 1.1
56     =head1 METHODS
57    
58     =over 4
59    
60     =item $x = SuikaWiki::Markup::XML->new (%options)
61    
62     Returns new instance of the module. It is itself a node.
63    
64 wakaba 1.4 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
65 wakaba 1.1
66     =cut
67    
68     sub new ($;%) {
69     my $class = shift;
70     my $self = bless {@_}, $class;
71     $self->{type} ||= '#element';
72 wakaba 1.5 if ($self->{qname}) {
73     ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname});
74     $self->{_qname} = $self->{qname};
75     }
76     if (defined $self->{namespace_prefix}) {
77     $self->{namespace_prefix} .= ':' if $self->{namespace_prefix}
78     && substr ($self->{namespace_prefix}, -1) ne ':';
79     $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri} if defined $self->{namespace_uri};
80     }
81 wakaba 1.4 for (qw/local_name value/) {
82 wakaba 1.2 if (ref $self->{$_}) {
83     $self->{$_}->{parent} = $self;
84     }
85     }
86 wakaba 1.1 $self->{node} ||= [];
87     $self;
88     }
89    
90 wakaba 1.5 sub _ns_parse_qname ($$) {
91     shift;
92     my $qname = shift;
93     if ($qname =~ /:/) {
94     return split /:/, $qname, 2;
95     } else {
96     return (undef, $qname);
97     }
98     }
99 wakaba 1.1
100     =item $x->append_node ($node)
101    
102     Appending given node to the object (as the last child).
103     If the type of given node is C<#fragment>, its all children, not the node
104     itself, are appended.
105    
106     This method returns the appended node unless the type of given node is C<#fragment>.
107     In such cases, this node (C<$x>) is returned.
108    
109 wakaba 1.2 Available options: C<node_or_text>.
110    
111 wakaba 1.1 =cut
112    
113 wakaba 1.2 sub append_node ($$;%) {
114 wakaba 1.1 my $self = shift;
115 wakaba 1.2 my ($new_node, %o) = @_;
116     unless (ref $new_node) {
117     if ($o{node_or_text}) {
118     return $self->append_text ($new_node);
119     } else {
120     die "append_node: Invalid node" unless ref $new_node;
121     }
122     }
123 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
124     for (@{$new_node->{node}}) {
125     push @{$self->{node}}, $_;
126     $_->{parent} = $self;
127     }
128     $self;
129     } else {
130     push @{$self->{node}}, $new_node;
131     $new_node->{parent} = $self;
132     $new_node;
133     }
134     }
135    
136     =item $new_node = $x->append_new_node (%options)
137    
138     Appending a new node. The new node is returned.
139    
140     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
141    
142     =cut
143    
144     sub append_new_node ($;%) {
145     my $self = shift;
146     my %o = @_;
147     my $new_node = __PACKAGE__->new (%o);
148     push @{$self->{node}}, $new_node;
149     $new_node->{parent} = $self;
150     $new_node;
151     }
152    
153     =item $new_node = $x->append_text ($text)
154    
155     Appending given text as a new text node. The new text node is returned.
156    
157     =cut
158    
159     sub append_text ($$;%) {
160     my $self = shift;
161     my $s = shift;
162     #if (@{$self->{node}}[-1]->{type} eq '#text') {
163     # $self->{node}}[-1]->append_new_node (type => '#text', value => $s);
164     #} else {
165     $self->append_new_node (type => '#text', value => $s);
166     #}
167     }
168    
169 wakaba 1.2 sub append_baretext ($$;%) {
170     my $self = shift;
171     my $s = shift;
172     $self->append_new_node (type => '#xml', value => $s);
173     }
174    
175 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
176    
177     Returns the attribute node whose local-name is C<$local_name>.
178    
179     Available options: C<namespace_uri>, C<make_new_node>.
180    
181     =cut
182    
183     sub get_attribute ($$;%) {
184     my $self = shift;
185     my ($name, %o) = @_;
186     for (@{$self->{node}}) {
187     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
188     return $_;
189     }
190     }
191     ## Node is not exist
192     if ($o{make_new_node}) {
193     return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri});
194     } else {
195     return undef;
196     }
197     }
198    
199     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
200    
201     Set the value of the attribute. The attribute node is returned.
202    
203     Available options: C<namespace_uri>.
204    
205     =cut
206    
207     sub set_attribute ($$$;%) {
208     my $self = shift;
209     my ($name, $val, %o) = @_;
210     for (@{$self->{node}}) {
211     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
212     $_->{value} = $val;
213     $_->{node} = [];
214     return $_;
215     }
216     }
217     return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri});
218     }
219    
220     =item \@children = $x->child_nodes
221    
222     Returns an array reference to child nodes.
223    
224 wakaba 1.4 =item $local_name = $x->local_name ([$new_name])
225 wakaba 1.1
226 wakaba 1.4 Returns or set the local-name.
227    
228     =item $uri = $x->namespace_uri ([$new_uri])
229    
230     Returns or set namespace name (URI) of the element or the attribute
231 wakaba 1.1
232 wakaba 1.5 =item $uri = $x->namespace_prefix ([$new_prefix])
233    
234     Returns or set namespace prefix of the element or the attribute.
235     You may give C<$new_prefix> in form either 'foo' or 'foo:'.
236     To indicate "default" prefix, use '' (length == 0 string).
237    
238 wakaba 1.1 =item $type = $x->node_type
239    
240     Returns the node type.
241    
242     =item $node = $x->parent_node
243    
244     Returns the parent node. If there is no parent node, undef is returned.
245    
246     =cut
247    
248     sub child_nodes ($) { shift->{node} }
249 wakaba 1.4 sub local_name ($;$) {
250     my ($self, $newname) = @_;
251     if ($newname) {
252     $self->{local_name} = $newname;
253     }
254     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
255     $self->{local_name}->{local_name};
256     } else {
257     $self->{local_name}
258     }
259     }
260 wakaba 1.1 sub node_type ($) { shift->{type} }
261     sub parent_node ($) { shift->{parent} }
262    
263 wakaba 1.4 ## TODO: obsolete
264     sub target_name ($;$) {
265     my ($self, $new) = @_;
266     if (defined $new) {
267     $self->{target_name} = $new;
268     }
269     $self->{target_name};
270     }
271    
272     sub namespace_uri ($;$) {
273     my ($self, $new_uri) = @_;
274     if (defined $new_uri) {
275     $self->{namespace_uri} = $new_uri;
276     }
277     $self->{namespace_uri};
278     }
279 wakaba 1.5 sub namespace_prefix ($;$) {
280     my ($self, $new_pfx) = @_;
281     if (defined $new_pfx && $self->{namespace_uri}) {
282     $new_pfx .= ':' if $new_pfx;
283     $self->{namespace_prefix} = $new_pfx;
284     $self->{ns}->{$new_pfx} = $self->{namespace_uri};
285     }
286     $self->_get_namespace_prefix ($self->{namespace_uri});
287     }
288 wakaba 1.4
289 wakaba 1.1 =item $i = $x->count
290    
291     Returns the number of child nodes.
292    
293     =cut
294    
295     # TODO: support counting by type
296     sub count ($;@) {
297 wakaba 1.2 my $self = shift;
298     (defined $self->{value} ? 1 : 0) + scalar @{$self->{node}};
299 wakaba 1.1 }
300    
301     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
302     sub _get_namespace_prefix ($$;%) {
303     my ($self, $uri) = (shift, shift);
304     my %o = @_;
305     if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
306     return $p if $self->_prefix_to_uri ($p) eq $uri;
307     } if ($Namespace_URI_to_prefix{$uri}) {
308     for (@{$Namespace_URI_to_prefix{$uri}}) {
309     my $pfx = $_; $pfx .= ':' if $pfx;
310     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
311     return $self->_uri_to_prefix ($uri => $pfx, %o);
312     }
313     }
314     } else {
315     my ($u_r_i, $pfx) = ($uri);
316 wakaba 1.4 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
317     my @u_r_i = split / /, $u_r_i;
318     for (reverse @u_r_i) {
319     if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
320     my $p_f_x = $1 . ':';
321     next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
322     unless ($self->_prefix_to_uri ($p_f_x)) {
323     $pfx = $p_f_x;
324     last;
325     }
326 wakaba 1.1 }
327     }
328     if ($pfx) {
329     return $self->_uri_to_prefix ($uri => $pfx, %o);
330     } else {
331     while (1) {
332     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
333     unless ($self->_prefix_to_uri ($pfx)) {
334     return $self->_uri_to_prefix ($uri => $pfx, %o);
335     }
336     }
337     }
338     }
339     }
340    
341 wakaba 1.4 sub _set_prefix_to_uri ($$$;%) {
342     my ($self, $prefix => $uri, %o) = @_;
343     return undef unless $self->_check_namespace_prefix ($prefix);
344     $self->{ns}->{$prefix} = $uri;
345     $self->_prefix_to_uri ($prefix);
346     }
347    
348     ## TODO: removing ns declare (1.1) support
349     # $uri or undef = $x->_prefix_to_uri ($prefix)
350 wakaba 1.1 sub _prefix_to_uri ($$;$%) {
351 wakaba 1.4 my ($self, $prefix, %o) = @_;
352 wakaba 1.1 return undef unless $self->_check_namespace_prefix ($prefix);
353     if (uc (substr $prefix, 0, 3) eq 'XML') {
354     return 'http://www.w3.org/XML/1998/namespace' if $prefix eq 'xml:';
355     return 'http://www.w3.org/2000/xmlns/' if $prefix eq 'xmlns:';
356 wakaba 1.2 }
357     if (defined $self->{ns}->{$prefix}) {
358 wakaba 1.1 $self->{ns}->{$prefix};
359     } elsif (ref $self->{parent}) {
360     shift; # $self
361     $self->{parent}->_prefix_to_uri (@_);
362     } else {
363     undef;
364     }
365     }
366    
367     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
368     # use_no_prefix (default: 1): Allow default namespace (no prefix).
369     sub _uri_to_prefix ($$;$%) {
370     my ($self, $uri, $new_prefix, %o) = @_;
371     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
372     $self->{ns}->{$new_prefix} = $uri;
373     $new_prefix;
374     } else {
375     return 'xml:' if $uri eq 'http://www.w3.org/XML/1998/namespace';
376     return 'xmlns:' if $uri eq 'http://www.w3.org/2000/xmlns/';
377     for (keys %{$self->{ns}||{}}) {
378     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
379     return $_ if $self->{ns}->{$_} eq $uri;
380     }
381 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
382 wakaba 1.1 shift; # $self
383     $self->{parent}->_uri_to_prefix (@_);
384     } else {
385     undef;
386     }
387     }
388     }
389    
390     =item $x->define_new_namespace ($prefix => $uri)
391    
392     Defines a new XML Namespace. This method is useful for root or section-level
393     element node.
394    
395     Returned value is unspecified in this version of this module.
396    
397     =cut
398    
399 wakaba 1.4 ## TODO: structured URI (such as http://&server;/) support
400 wakaba 1.1 sub define_new_namespace ($$$) {
401     my ($self, $prefix, $uri) = @_;
402     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
403     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
404 wakaba 1.4 $self->_set_prefix_to_uri ($prefix => $uri);
405 wakaba 1.1 } else {
406     undef;
407     }
408     }
409    
410 wakaba 1.4 =item $uri = $x->defined_namespace_prefix ($prefix)
411    
412     Query whether the namespace prefix is defined or not.
413     If defined, return namespace name (URI).
414    
415     =cut
416    
417     sub defined_namespace_prefix ($$) {
418     my ($self, $prefix) = @_;
419     $prefix .= ':' if $prefix;#Carp::carp join ",", ';',@_;
420     $self->_prefix_to_uri ($prefix);
421     }
422    
423 wakaba 1.1 =item $qname = $x->qname
424    
425     Returns QName ((namespace-)qualified name) of the element type.
426     Undef is retuened when the type does not have its QName
427     (ie. when type is neither C<#element> or C<#attribute>).
428    
429     =cut
430    
431     sub qname ($) {
432     my $self = shift;
433     if ($self->_check_ncname ($self->{local_name})) {
434     if ($self->{type} eq '#element') {
435     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
436     unless $self->{_qname};
437     return $self->{_qname};
438     } elsif ($self->{type} eq '#attribute') {
439     return $self->attribute_name;
440     }
441     }
442     undef;
443     }
444    
445     =item $tag = $x->start_tag
446    
447 wakaba 1.4 Returns the start tag (or something that marks the start of something, such as '<!--'
448 wakaba 1.1 for C<#comment> nodes).
449    
450     =cut
451    
452     sub start_tag ($) {
453     my $self = shift;
454     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
455     my $r = '<';
456     $r .= $self->qname;
457     for (@{$self->{node}}) {
458     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
459     }
460     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
461     if ($prefix) {
462     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
463     } else {
464     $r .= ' xmlns';
465     }
466     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
467     }
468     $r .= '>';
469     $r;
470     } elsif ($self->{type} eq '#comment') {
471 wakaba 1.4 '<!--';
472     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
473     '<?' . ($self->{local_name});
474 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
475     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
476     '&#x';
477     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
478     '&#';
479 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
480     if ($self->{local_name}->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
481 wakaba 1.2 '%';
482     } else {
483     '&';
484     }
485 wakaba 1.4 } elsif ($self->_check_ncname ($self->{local_name})) {
486 wakaba 1.2 if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
487     '%';
488     } else {
489     '&';
490     }
491     } else { # error
492     '';
493     }
494 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
495     '<!' . {
496     $NS_SGML.'attlist' => 'ATTLIST',
497     $NS_SGML.'doctype' => 'DOCTYPE',
498     $NS_SGML.'element' => 'ELEMENT',
499     $NS_SGML.'entity' => 'ENTITY',
500     $NS_SGML.'entity:parameter' => 'ENTITY %',
501     $NS_SGML.'notation' => 'NOTATION',
502     $NS_SGML.'sgml' => 'SGML',
503     }->{$self->{namespace_uri}} . ' ';
504 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
505     my $r = '<!' . $self->{local_name} . ' ';
506     if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) {
507     my $qname;
508     for (@{$self->{parent}->{node}}) {
509     if ($_->{type} eq '#element') {
510     $qname = $_->qname;
511     last if $qname;
512     }
513     }
514     $r .= ($qname ? $qname : '#IMPLIED') . ' ';
515 wakaba 1.2 }
516     $r;
517     } elsif ($self->{type} eq '#section') {
518     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
519     '<![' . $self->{local_name} . '[';
520     } elsif ($self->_check_ncname ($self->{local_name})) {
521     '<![' . $self->{local_name} . '[';
522     } else { # error
523     '';
524 wakaba 1.1 }
525     } else {
526     '';
527     }
528     }
529    
530     =item $tag = $x->end_tag
531    
532 wakaba 1.4 Returns the end tag (or something that marks the end of something, such as '-->'
533 wakaba 1.1 for C<#comment> nodes).
534    
535     =cut
536    
537     sub end_tag ($) {
538     my $self = shift;
539     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
540     '</' . $self->qname . '>';
541     } elsif ($self->{type} eq '#comment') {
542 wakaba 1.4 '-->';
543 wakaba 1.1 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
544     '?>';
545 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
546     ';';
547 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
548     '>';
549 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
550 wakaba 1.4 '>';
551 wakaba 1.2 } elsif ($self->{type} eq '#section') {
552     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
553     ']]>';
554     } elsif ($self->_check_ncname ($self->{local_name})) {
555     ']]>';
556     } else { # error
557     '';
558     }
559 wakaba 1.1 } else {
560     '';
561     }
562     }
563    
564     =item $tag = $x->attribute_name
565    
566     Returns the attribute name.
567    
568     =cut
569    
570     sub attribute_name ($) {
571     my $self = shift;
572     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
573     ($self->{namespace_uri} ?
574     (ref $self->{parent} ? $self->{parent} : $self)
575     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
576     .$self->{local_name};
577     } else {
578     '';
579     }
580     }
581    
582     =item $tag = $x->attribute_value
583    
584     Returns the attribute value.
585    
586     =cut
587    
588 wakaba 1.4 sub attribute_value ($;%) {
589 wakaba 1.1 my $self = shift;
590 wakaba 1.4 my %o = @_;
591 wakaba 1.1 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
592 wakaba 1.5 my $r = '"';
593     if (ref ($self->{value}) && $self->{value}->_CLASS_NAME eq $self->_CLASS_NAME) {
594     unshift @{$self->{node}}, $self->{value};
595     undef $self->{value};
596     } else {
597     $r .= $self->_entitize ($self->{value});
598     }
599 wakaba 1.4 for (@{$self->{node}}) {
600     my $nt = $_->node_type;
601     if ($nt eq '#reference' || $nt eq '#xml') {
602     $r .= $_->outer_xml;
603     } elsif ($nt ne '#attribute') {
604     $r .= $self->_entitize ($_->inner_text, percent => $o{escape_percent});
605     }
606     }
607     return $r . '"';
608     } else {
609     '';
610     }
611     }
612    
613     sub entity_value ($;%) {
614     my $self = shift;
615     my %o = @_;
616     my $_entitize = sub {
617     my $s = shift;
618     $s =~ s/&/&amp;/g;
619     $s =~ s/%/&#x25;/g;
620     $s =~ s/"/&quot;/g;
621     $s;
622     };
623     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
624     my $r = '"' . &$_entitize ($self->{value});
625     for (@{$self->{node}}) {
626     my $nt = $_->node_type;
627     if ($nt eq '#reference' || $nt eq '#xml') {
628     $r .= $_->outer_xml;
629     } elsif ($nt ne '#attribute') {
630     $r .= &$_entitize ($_->inner_text);
631 wakaba 1.1 }
632     }
633 wakaba 1.4 return $r . '"';
634 wakaba 1.1 } else {
635     '';
636     }
637     }
638    
639     =item $tag = $x->attribute
640    
641     Returns the attribute (name and value pair).
642    
643     =cut
644    
645     sub attribute ($) {
646     my $self = shift;
647     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
648     $self->attribute_name . '=' . $self->attribute_value;
649     } else {
650     '';
651     }
652     }
653    
654 wakaba 1.2 sub external_id ($;%) {
655     my $self = shift;
656     my %o = @_;
657     my ($pubid, $sysid, $ndata);
658     for (@{$self->{node}}) {
659 wakaba 1.4 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
660 wakaba 1.2 if ($_->{local_name} eq 'PUBLIC') {
661 wakaba 1.4 $pubid = $_->inner_text;
662 wakaba 1.2 } elsif ($_->{local_name} eq 'SYSTEM') {
663 wakaba 1.4 $sysid = $_->inner_text;
664 wakaba 1.2 } elsif ($_->{local_name} eq 'NDATA') {
665 wakaba 1.4 $ndata = $_->inner_text;
666     undef $ndata unless $self->_check_ncname ($ndata);
667 wakaba 1.2 }
668     }
669     }
670     my $r = '';
671 wakaba 1.4 if (length $pubid) {
672     $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
673     }
674     if (length $sysid) {
675     if ($sysid =~ /"/) {
676     if ($sysid =~ /'/) {
677     $sysid =~ s/"/&quot;/; $sysid = '"' . $sysid . '"';
678     } else {
679     $sysid = "'" . $sysid . "'";
680     }
681     } else {
682     $sysid = '"' . $sysid . '"';
683     }
684     }
685 wakaba 1.2 if ($pubid && $sysid) {
686 wakaba 1.4 $r = 'PUBLIC "' . $pubid . '" ' . $sysid;
687 wakaba 1.2 } elsif ($sysid) {
688 wakaba 1.4 $r = 'SYSTEM ' . $sysid;
689 wakaba 1.2 } elsif ($pubid && $o{allow_pubid_only}) {
690 wakaba 1.4 $r = 'PUBLIC "' . $pubid . '"';
691 wakaba 1.2 }
692     if ($r && $ndata && $o{use_ndata}) {
693 wakaba 1.4 $r .= ' NDATA ' . $ndata;
694 wakaba 1.2 }
695     $r;
696     }
697    
698     =item $s = $x->content_spec
699    
700     Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>)
701     or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>).
702    
703     =cut
704    
705     sub content_spec ($) {
706     my $self = shift;
707     if ($self->{type} eq '#element') {
708     my $text = 0;
709     my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}};
710     $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text;
711    
712     return $contentspec ? '(' . $contentspec . ')' : 'EMPTY';
713     } elsif ($self->{type} eq '#attribute') {
714     my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t";
715     my $default = $self->{default_decl};
716     $default .= ' ' . $self->attribute_value if $default eq '#FIXED';
717     unless ($default) {
718     $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED';
719     }
720     return $attdef . $default;
721     }
722     }
723    
724 wakaba 1.1 =item $tag = $x->inner_xml
725    
726     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
727     element content without start- and end-tags is returned.)
728    
729     Note that for not all node types the behavior of this method is defined.
730     For example, returned value of C<#attribute> might be unexpected one
731     in this version of this module.
732    
733     =cut
734    
735 wakaba 1.3 sub inner_xml ($;%) {
736 wakaba 1.1 my $self = shift;
737 wakaba 1.3 my %o = @_;
738 wakaba 1.1 my $r = '';
739     if ($self->{type} eq '#comment') {
740     $r = $self->inner_text;
741     $r =~ s/--/-&#x45;/g;
742     } elsif ($self->{type} eq '#pi') {
743     if (length $self->{value}) {
744     $r = ' ' . $self->{value};
745 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
746 wakaba 1.1 }
747     for (@{$self->{node}}) {
748     if ($_->node_type eq '#attribute') {
749     $r .= ' ' . $_->attribute;
750     } else {
751     my $s = $_->inner_text;
752     $s =~ s/\?>/?&gt;/g;
753     $r .= ' ' . $s if length $s;
754     }
755     }
756 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
757     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
758     $r = sprintf '%02X', $self->{value};
759     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
760     $r = sprintf '%02d', $self->{value};
761 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
762     $r = $self->{local_name}->{local_name};
763     } elsif ($self->_check_ncname ($self->{local_name})) {
764     $r = ($self->{local_name});
765 wakaba 1.2 } else { # error
766     $r = '';
767     }
768 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
769 wakaba 1.2 if ($self->{local_name} eq 'DOCTYPE') {
770     my ($isub, $xid) = ('', $self->external_id);
771     for (@{$self->{node}}) {
772 wakaba 1.4 $isub .= $_->outer_xml if $_->{type} ne '#attribute';
773 wakaba 1.2 }
774     if ($xid) {
775     $r = $xid;
776     if ($isub) {
777     $r .= " [\n" . $isub . "]";
778 wakaba 1.1 }
779     } else {
780 wakaba 1.2 if ($isub) {
781     $r = "[\n" . $isub . "]";
782     } else {
783     $r = "[]";
784     }
785 wakaba 1.1 }
786 wakaba 1.4 } elsif ($self->{namespace_uri} eq $NS_SGML.'doctype') {
787     my $root = $self->get_attribute ('qname');
788     ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
789     for (@{$self->{parent}->{node}}) {
790     if ($_->{type} eq '#element') {
791     $root = $_->qname;
792     last if $root;
793     }
794     }
795     $root = '#IMPLIED';
796     }) : ($root = '#IMPLIED')); ## error!
797     my ($isub, $xid) = ('', $self->external_id);
798     for (@{$self->{node}}) {
799     $isub .= $_->outer_xml if $_->{type} ne '#attribute';
800 wakaba 1.2 }
801     if ($xid) {
802 wakaba 1.4 $r = $xid;
803     if ($isub) {
804     $r .= " [\n" . $isub . "]";
805     }
806     } else {
807     if ($isub) {
808     $r = "[\n" . $isub . "]";
809     } else {
810     $r = "[]";
811     }
812     }
813     $r = $root . ' ' . $r;
814     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity'
815     || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter'
816     || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:notation') {
817     my %xid_opt;
818     $r = $self->{local_name} . ' ' if $self->_check_ncname ($self->{local_name});
819     if ($self->{namespace_uri} eq $NS_SGML.'entity:parameter') {
820     #$r = '% ' . $r;
821     } elsif ($self->{namespace_uri} eq $NS_SGML.'entity') {
822     $xid_opt{use_ndata} = 1;
823     } elsif ($self->{namespace_uri} eq $NS_SGML.'notation') {
824     $xid_opt{allow_pubid_only} = 1;
825     }
826    
827     my ($v, $xid) = ($self->{value}, $self->external_id (%xid_opt));
828     if ($xid) { ## External ID
829 wakaba 1.2 $r .= $xid;
830 wakaba 1.4 } else { ## EntityValue
831     my $entity_value = $self->get_attribute ('value');
832     if ($entity_value) { # <!ENTITY foo "bar">
833     $r .= $entity_value->entity_value;
834     } else { ## Parameter entity reference
835     $r .= $self->{value};
836     for (@{$self->{node}}) {
837     $r .= $_->outer_xml;
838     }
839 wakaba 1.2 }
840 wakaba 1.1 }
841 wakaba 1.4 } elsif ($self->{namespace_uri} eq $NS_SGML.'element') {
842     my $qname = $self->get_attribute ('qname');
843     ref $qname ? $qname = $qname->inner_text : undef;
844     if ($qname && $self->_check_name ($qname)) {
845     ## Element type name is defined
846     $r = $qname . ' ';
847 wakaba 1.3 if ($o{output_tag_omit_declaration}) {
848 wakaba 1.4 $r .= ($self->{flag}->{element_tag_start_omitable} ? 'o ' : '- ')
849     .
850     ($self->{flag}->{element_tag_end_omitable} ? 'o ' : '- ');
851 wakaba 1.3 }
852 wakaba 1.4 my $rs = $self->inner_text (output_ref_as_is => 1);
853     $r .= $rs || 'ANY';
854 wakaba 1.2 } else {
855 wakaba 1.4 #if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') {
856     ### Element prototype is given
857     # $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec;
858     #} elsif () {
859     ### Element type name and contentspec is given
860     # $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY');
861     #} else {
862 wakaba 1.2 ## (Element type name and contetnspac) is given
863 wakaba 1.4 $r .= $self->inner_text (output_ref_as_is => 1);
864     # || 'Name ANY'; # error
865     #}
866 wakaba 1.2 }
867     } elsif ($self->{local_name} eq 'ATTLIST') {
868     if ($self->_check_name ($self->{target_name})) {
869     $r = $self->{target_name};
870     }
871     my $t = $self->inner_text (output_ref_as_is => 1);
872     $r .= "\n\t" . $t if $t;
873     $r ||= 'Name'; # error!
874     for (@{$self->{node}}) {
875     if ($_->{type} eq '#attribute') {
876     $r .= "\n\t" . $_->content_spec;
877     }
878 wakaba 1.1 }
879 wakaba 1.2 } else { # unknown
880 wakaba 1.3 for (@{$self->{node}}) {
881     $r .= $_->outer_xml;
882     }
883 wakaba 1.1 }
884 wakaba 1.2 } elsif ($self->{type} eq '#section' && !ref $self->{local_name} && $self->{local_name} eq 'CDATA') {
885     $r = $self->inner_text;
886     $r =~ s/]]>/]]>]]<![CDATA[>/g;
887 wakaba 1.1 } else {
888     if ($self->{type} ne '#xml') {
889     $r = $self->_entitize ($self->{value});
890     } else {
891     $r = $self->{value};
892     }
893     for (@{$self->{node}}) {
894     my $nt = $_->node_type;
895 wakaba 1.4 if (($self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
896 wakaba 1.1 $r .= "\n";
897     }
898     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
899     }
900     }
901     $r;
902     }
903    
904    
905     =item $tag = $x->outer_xml
906    
907     Returns the node in XML syntax.
908    
909     =cut
910    
911     sub outer_xml ($) {
912     my $self = shift;
913     if ($self->{type} eq '#attribute') {
914     $self->attribute;
915     } else {
916 wakaba 1.4 if ($self->{option}->{indent} && $self->{type} eq '#element') {
917 wakaba 1.1 my $r = $self->start_tag;
918     my $c = $self->inner_xml;
919 wakaba 1.4 if (!length $c && $self->{option}->{use_EmptyElemTag}) {
920     substr ($r, -1) = ' />';
921     } else {
922     if ($c) {
923     $c =~ s/\n/\n /g;
924     $r .= "\n " . $c . "\n";
925     }
926     $r .= $self->end_tag;
927 wakaba 1.1 }
928     $r;
929     } else {
930 wakaba 1.4 my $r = $self->start_tag;
931     my $c = $self->inner_xml;
932     if ($self->{type} eq '#element' && !length $c && $self->{option}->{use_EmptyElemTag}) {
933     substr ($r, -1) = ' />';
934     } else {
935     $r .= $c . $self->end_tag;
936     #$r .= "\n" if $self->{type} eq '#declaration';
937     }
938 wakaba 1.2 $r;
939     #'{'.$self->{type}.': '.$r.'}'; # for debug
940 wakaba 1.1 }
941     }
942     }
943    
944     =item $tag = $x->inner_text
945    
946     Returns the text content of the node. (In many case the returned value is same
947     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
948     But some classes that inherits this module might implement to return other
949     value (eg. to return the value of the alt attribute of html:img element).
950    
951 wakaba 1.2 Available options: C<output_ref_as_is>.
952    
953 wakaba 1.1 =cut
954    
955 wakaba 1.2 sub inner_text ($;%) {
956 wakaba 1.1 my $self = shift;
957 wakaba 1.2 my %o = @_;
958 wakaba 1.5 my $r = '';
959     if (ref ($self->{value}) && $self->{value}->_CLASS_NAME eq $self->_CLASS_NAME) {
960     unshift @{$self->{node}}, $self->{value};
961     undef $self->{value};
962     } else {
963     $r = $self->{value};
964     }
965 wakaba 1.2 if ($o{output_ref_as_is}) { ## output as if RCDATA
966 wakaba 1.4 $r =~ s/&/&amp;/g;
967     for my $node (@{$self->{node}}) {
968     my $nt = $node->node_type;
969     if ($nt eq '#reference' || $nt eq '#xml') {
970     $r .= $node->outer_xml;
971 wakaba 1.2 } elsif ($nt ne '#attribute') {
972 wakaba 1.4 $r .= map {s/&/&amp;/g; $_} $node->inner_text;
973 wakaba 1.2 }
974     }
975     } else {
976     for (@{$self->{node}}) {
977     $r .= $_->inner_text unless $_->node_type eq '#attribute';
978     }
979 wakaba 1.1 }
980     $r;
981     }
982    
983     {no warnings; # prototype mismatch
984     *stringify = \&outer_xml;
985     }
986    
987 wakaba 1.5 sub _CLASS_NAME ($) {
988     'SuikaWiki::Markup::XML';
989     }
990    
991 wakaba 1.1 # $s = $x->_entitize ($s)
992 wakaba 1.4 sub _entitize ($$;%) {
993     my ($self, $s, %o) = (shift, shift, @_);
994 wakaba 1.1 $s =~ s/&/&amp;/g;
995     $s =~ s/</&lt;/g;
996     $s =~ s/>/&gt;/g;
997     $s =~ s/"/&quot;/g;
998 wakaba 1.4 $s =~ s/%/&#x25;/g if $o{percent};
999     $s =~ s/'/&#x27;/g if $o{apos};
1000 wakaba 1.6 $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&amp;#%d;', ord $1/g;
1001 wakaba 1.1 $s;
1002     }
1003    
1004 wakaba 1.2 # 1/0 = $x->_check_name ($s)
1005     sub _check_name ($$) {
1006     my $self = shift;
1007     my $s = shift;
1008     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
1009     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
1010     # \p{...}('*'/'+'/'{n,}') does not work...
1011     $Cache{name}->{$s} = 1;
1012     1;
1013     } else {
1014     $Cache{name}->{$s} = 0;
1015     0;
1016     }
1017     }
1018 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
1019     sub _check_ncname ($$) {
1020     my $self = shift;
1021     my $s = shift;
1022     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
1023     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
1024     # \p{...}('*'/'+'/'{n,}') does not work...
1025     $Cache{ncname}->{$s} = 1;
1026     1;
1027     } else {
1028     $Cache{ncname}->{$s} = 0;
1029     0;
1030     }
1031     }
1032    
1033     # 1/0 = $x->_check_namespace_prefix ($s)
1034     sub _check_namespace_prefix ($$) {
1035     my $self = shift;
1036     my $s = shift;
1037     return 0 unless defined $s;
1038     return 1 if $s eq '';
1039     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
1040     $self->_check_ncname ($s);
1041     }
1042    
1043 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
1044     ## multiple parents.
1045     ## TODO: normalize namespace URI (removing non URI chars)
1046    
1047     sub flag ($$;$) {
1048     my ($self, $name, $value) = @_;
1049     if (defined $value) {
1050     $self->{flag}->{$name} = $value;
1051     }
1052     $self->{flag}->{$name};
1053     }
1054    
1055 wakaba 1.3 sub option ($$;$) {
1056     my ($self, $name, $value) = @_;
1057     if (defined $value) {
1058     $self->{option}->{$name} = $value;
1059     }
1060     $self->{option}->{$name};
1061     }
1062    
1063 wakaba 1.1 =back
1064    
1065 wakaba 1.2 =head1 NODE TYPES
1066    
1067     =over 4
1068    
1069     =item #attribute
1070    
1071     Attribute. Its XML representation takes the form of NAME="VALUE".
1072    
1073     =item #comment
1074    
1075     Comment declarement. <!-- -->
1076    
1077     =item #declarement
1078    
1079     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1080     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1081    
1082     =item #element
1083    
1084     Element. Its XML representation consists of start tag, content and end tag,
1085     like <TYPE>content</TYPE>.
1086    
1087     =item #fragment
1088    
1089     Fragment of nodes. It's similar to DOM's fragment node.
1090    
1091     =item #pi
1092    
1093     Prosessing instruction. <?NAME VALUE?>
1094    
1095     =item #reference
1096    
1097     Character reference or general or parameter entity reference.
1098     &#nnnn;, &#xhhhh;, &name;, %name;.
1099    
1100     =item #section
1101    
1102     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1103     <![%type;[...]]>
1104    
1105     =item #text
1106    
1107     Text.
1108    
1109     =item #xml
1110    
1111     Preformatted XML text.
1112    
1113     =cut
1114    
1115 wakaba 1.4 =head1 RESTRICTIONS
1116    
1117     =over 4
1118    
1119     =item XML without XML Namespace is not supported.
1120    
1121     =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1122    
1123     For example, next code generates invalid (non-well-formed) XML Namespace document.
1124    
1125     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1126     print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1127    
1128     So you must write like:
1129    
1130     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1131     $x->define_new_namespace ('' => '');
1132     print $x; # <elementType xmlns=""></elementType>
1133    
1134     =back
1135    
1136 wakaba 1.1 =head1 LICENSE
1137    
1138     Copyright 2003 Wakaba <w@suika.fam.cx>
1139    
1140     This program is free software; you can redistribute it and/or
1141     modify it under the same terms as Perl itself.
1142    
1143     =cut
1144    
1145 wakaba 1.6 1; # $Date: 2003/05/25 10:55:14 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24