=head1 NAME SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator =head1 DESCRIPTION This module can be used to generate the document fragment of XML, SGML or other well-formed (in XML meaning) data formats with the object oriented manner. This module cannot be used to parse XML (or other marked-up) document (or its fragment) by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose of this module is to make it easy for tiny perl scripts to GENERATE well-formed markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-)) =cut package SuikaWiki::Markup::XML; use strict; our $VERSION = do{my @r=(q$Revision: 1.8 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; use overload '""' => \&stringify, fallback => 1; use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!; our %Namespace_URI_to_prefix = ( 'DAV:' => [qw/dav webdav/], 'http://greenbytes.de/2002/rfcedit' => [qw/ed/], 'http://icl.com/saxon' => [qw/saxon/], 'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/], 'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/], 'http://purl.org/rss/1.0/' => ['', qw/rss rss10/], 'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/], 'http://www.mozilla.org/xbl' => ['', qw/xbl/], 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/], 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/], 'http://www.w3.org/1999/xlink' => [qw/l xlink/], 'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/], 'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/], 'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/], 'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/], 'http://www.w3.org/2000/svg' => ['', qw/s svg/], 'http://www.w3.org/2002/06/hlink' => [qw/h hlink/], 'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/], 'http://www.w3.org/2002/07/owl' => [qw/owl/], 'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/], 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/], 'http://www.wapforum.org/2001/wml' => [qw/wap/], 'http://xml.apache.org/xalan' => [qw/xalan/], 'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/], 'urn:schemas-microsoft-com:vml' => [qw/v vml/], 'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/], 'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/], 'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/], ); my %Cache; my %NS = ( SGML => 'urn:x-suika-fam-cx:markup:sgml:', xml => 'http://www.w3.org/XML/1998/namespace', xmlns => 'http://www.w3.org/2000/xmlns/', ); =head1 METHODS =over 4 =item $x = SuikaWiki::Markup::XML->new (%options) Returns new instance of the module. It is itself a node. Available options: C, C, C (default: C<#element>), C, C and C. =cut sub new ($;%) { my $class = shift; my $self = bless {@_}, $class; $self->{type} ||= '#element'; if ($self->{qname}) { ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname}); $self->{_qname} = $self->{qname}; } if (defined $self->{namespace_prefix}) { $self->{namespace_prefix} .= ':' if $self->{namespace_prefix} && substr ($self->{namespace_prefix}, -1) ne ':'; $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri} if defined $self->{namespace_uri}; } for (qw/local_name value/) { if (ref ($self->{$_}) && eval q($self->{$_}->_CLASS_NAME eq $self->_CLASS_NAME)) { $self->{$_}->{parent} = $self; } } $self->{node} ||= []; $self; } sub _ns_parse_qname ($$) { shift; my $qname = shift; if ($qname =~ /:/) { return split /:/, $qname, 2; } else { return (undef, $qname); } } =item $x->append_node ($node) Appending given node to the object (as the last child). If the type of given node is C<#fragment>, its all children, not the node itself, are appended. This method returns the appended node unless the type of given node is C<#fragment>. In such cases, this node (C<$x>) is returned. Available options: C. =cut sub append_node ($$;%) { my $self = shift; my ($new_node, %o) = @_; unless (ref $new_node) { if ($o{node_or_text}) { return $self->append_text ($new_node); } else { die "append_node: Invalid node" unless ref $new_node; } } if ($new_node->{type} eq '#fragment') { for (@{$new_node->{node}}) { push @{$self->{node}}, $_; $_->{parent} = $self; } $self; } else { push @{$self->{node}}, $new_node; $new_node->{parent} = $self; $new_node; } } =item $new_node = $x->append_new_node (%options) Appending a new node. The new node is returned. Available options: C, C, C, C. =cut sub append_new_node ($;%) { my $self = shift; my %o = @_; my $new_node = __PACKAGE__->new (%o); push @{$self->{node}}, $new_node; $new_node->{parent} = $self; $new_node; } =item $new_node = $x->append_text ($text) Appending given text as a new text node. The new text node is returned. =cut sub append_text ($$;%) { my $self = shift; my $s = shift; #if (@{$self->{node}}[-1]->{type} eq '#text') { # $self->{node}}[-1]->append_new_node (type => '#text', value => $s); #} else { $self->append_new_node (type => '#text', value => $s); #} } sub append_baretext ($$;%) { my $self = shift; my $s = shift; $self->append_new_node (type => '#xml', value => $s); } =item $attr_node = $x->get_attribute ($local_name, %options) Returns the attribute node whose local-name is C<$local_name>. Available options: C, C. =cut sub get_attribute ($$;%) { my $self = shift; my ($name, %o) = @_; for (@{$self->{node}}) { if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) { return $_; } } ## Node is not exist if ($o{make_new_node}) { return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri}); } else { return undef; } } =item $attr_node = $x->set_attribute ($local_name => $value, %options) Set the value of the attribute. The attribute node is returned. Available options: C. =cut sub set_attribute ($$$;%) { my $self = shift; my ($name, $val, %o) = @_; if (ref ($val) eq 'ARRAY' || ref ($val) eq 'HASH' || ref ($val) eq 'CODE') { ## TODO: common error handling require Carp; Carp::croak "set_attribute: new attribute value must be string or blessed object"; } for (@{$self->{node}}) { if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) { $_->{value} = $val; $_->{node} = []; return $_; } } return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri}); } =item \@children = $x->child_nodes Returns an array reference to child nodes. =item $local_name = $x->local_name ([$new_name]) Returns or set the local-name. =item $uri = $x->namespace_uri ([$new_uri]) Returns or set namespace name (URI) of the element or the attribute =item $uri = $x->namespace_prefix ([$new_prefix]) Returns or set namespace prefix of the element or the attribute. You may give C<$new_prefix> in form either 'foo' or 'foo:'. To indicate "default" prefix, use '' (length == 0 string). =item $type = $x->node_type Returns the node type. =item $node = $x->parent_node Returns the parent node. If there is no parent node, undef is returned. =cut sub child_nodes ($) { shift->{node} } sub local_name ($;$) { my ($self, $newname) = @_; if ($newname) { $self->{local_name} = $newname; } if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') { $self->{local_name}->{local_name}; } else { $self->{local_name} } } sub node_type ($) { shift->{type} } sub parent_node ($) { shift->{parent} } ## TODO: obsolete sub target_name ($;$) { my ($self, $new) = @_; if (defined $new) { $self->{target_name} = $new; } $self->{target_name}; } sub namespace_uri ($;$) { my ($self, $new_uri) = @_; if (defined $new_uri) { $self->{namespace_uri} = $new_uri; } $self->{namespace_uri}; } sub namespace_prefix ($;$) { my ($self, $new_pfx) = @_; if (defined $new_pfx && $self->{namespace_uri}) { $new_pfx .= ':' if $new_pfx; $self->{namespace_prefix} = $new_pfx; $self->{ns}->{$new_pfx} = $self->{namespace_uri}; } $self->_get_namespace_prefix ($self->{namespace_uri}); } =item $i = $x->count Returns the number of child nodes. =cut # TODO: support counting by type sub count ($;@) { my $self = shift; (defined $self->{value} ? 1 : 0) + scalar @{$self->{node}}; } # $prefix = $x->_get_namespace_prefix ($namespace_uri) sub _get_namespace_prefix ($$;%) { my ($self, $uri) = (shift, shift); my %o = @_; if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) { return $p if $self->_prefix_to_uri ($p) eq $uri; } if ($Namespace_URI_to_prefix{$uri}) { for (@{$Namespace_URI_to_prefix{$uri}}) { my $pfx = $_; $pfx .= ':' if $pfx; if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) { return $self->_uri_to_prefix ($uri => $pfx, %o); } } } else { my ($u_r_i, $pfx) = ($uri); $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g; my @u_r_i = split / /, $u_r_i; for (reverse @u_r_i) { if (s/([A-Za-z][0-9A-Za-z._-]+)//) { my $p_f_x = $1 . ':'; next if lc (substr ($p_f_x, 0, 3)) eq 'xml'; unless ($self->_prefix_to_uri ($p_f_x)) { $pfx = $p_f_x; last; } } } if ($pfx) { return $self->_uri_to_prefix ($uri => $pfx, %o); } else { while (1) { my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':'; unless ($self->_prefix_to_uri ($pfx)) { return $self->_uri_to_prefix ($uri => $pfx, %o); } } } } } sub _set_prefix_to_uri ($$$;%) { my ($self, $prefix => $uri, %o) = @_; return undef unless $self->_check_namespace_prefix ($prefix); $self->{ns}->{$prefix} = $uri; $self->_prefix_to_uri ($prefix); } ## TODO: removing ns declare (1.1) support # $uri or undef = $x->_prefix_to_uri ($prefix) sub _prefix_to_uri ($$;$%) { my ($self, $prefix, %o) = @_; return undef unless $self->_check_namespace_prefix ($prefix); if (uc (substr $prefix, 0, 3) eq 'XML') { return $NS{xml} if $prefix eq 'xml:'; return $NS{xmlns} if $prefix eq 'xmlns:'; } if (defined $self->{ns}->{$prefix}) { $self->{ns}->{$prefix}; } elsif (ref $self->{parent}) { shift; # $self $self->{parent}->_prefix_to_uri (@_); } else { undef; } } # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options) # use_no_prefix (default: 1): Allow default namespace (no prefix). sub _uri_to_prefix ($$;$%) { my ($self, $uri, $new_prefix, %o) = @_; if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) { $self->{ns}->{$new_prefix} = $uri; $new_prefix; } else { return 'xml:' if $uri eq $NS{xml}; return 'xmlns:' if $uri eq $NS{xmlns}; for (keys %{$self->{ns}||{}}) { next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix}); return $_ if $self->{ns}->{$_} eq $uri; } if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') { shift; # $self $self->{parent}->_uri_to_prefix (@_); } else { undef; } } } =item $x->define_new_namespace ($prefix => $uri) Defines a new XML Namespace. This method is useful for root or section-level element node. Returned value is unspecified in this version of this module. =cut ## TODO: structured URI (such as http://&server;/) support sub define_new_namespace ($$$) { my ($self, $prefix, $uri) = @_; if ($prefix eq '' || $self->_check_ncname ($prefix)) { $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':'; $self->_set_prefix_to_uri ($prefix => $uri); } else { undef; } } =item $uri = $x->defined_namespace_prefix ($prefix) Query whether the namespace prefix is defined or not. If defined, return namespace name (URI). =cut sub defined_namespace_prefix ($$) { my ($self, $prefix) = @_; $prefix .= ':' if $prefix;#Carp::carp join ",", ';',@_; $self->_prefix_to_uri ($prefix); } =item $qname = $x->qname Returns QName ((namespace-)qualified name) of the element type. Undef is retuened when the type does not have its QName (ie. when type is neither C<#element> or C<#attribute>). =cut sub qname ($) { my $self = shift; if ($self->_check_ncname ($self->{local_name})) { if ($self->{type} eq '#element') { $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name} unless $self->{_qname}; return $self->{_qname}; } elsif ($self->{type} eq '#attribute') { return $self->attribute_name; } } undef; } sub remove_references ($) { my $self = shift; my @node; for (@{$self->{node}}) { $_->remove_references; } for (@{$self->{node}}) { if ($_->{type} ne '#reference' || ($self->{type} eq '#declaration' && $_->{namespace_uri} eq $NS{SGML}.'entity')) { push @node, $_; } else { if ($_->{namespace_uri} =~ /char/) { my $e = ref ($_)->new (type => '#text', value => chr $_->{value}); $e->{parent} = $self; push @node, $e; } elsif ($_->{flag}->{smxp__ref_expanded}) { for my $e (@{$_->{node}}) { if ($e->{type} ne '#attribute') { $e->{parent} = $self; push @node, $e; } } } else { ## reference is not expanded push @node, $_; } } $_->{flag}->{smxp__defined_with_param_ref} = 0 if $_->{flag}->{smxp__defined_with_param_ref}; } $self->{node} = \@node; } sub resolve_relative_uri ($;$%) { require URI; my ($self, $rel, %o) = @_; my $base = $self->get_attribute ('base', namespace_uri => $NS{xml}); $base = ref ($base) ? $base->inner_text : undef; if ($base !~ /^(?:[0-9A-Za-z.+-]|%[0-9A-Fa-f]{2})+:/) { # $base is relative $base = $self->_resolve_relative_uri_by_parent ($base, \%o); } eval q{ ## Catch error such as $base is 'data:,foo' (non hierarchic scheme,...) return URI->new ($rel)->abs ($base || '.'); ## BUG (or spec) of URI: $base == false } or return $rel; } sub _resolve_relative_uri_by_parent ($$$) { my ($self, $rel, $o) = @_; if (ref $self->{parent}) { if (!$o->{use_references_base_uri} && $self->{parent}->{type} eq '#reference') { ## This case is necessary to work with ## ## text ## &ent; ## ## entity's text ## ## return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o); } else { return $self->{parent}->resolve_relative_uri ($rel, %$o); } } else { return $rel; } } sub base_uri ($;$) { my ($self, $new_uri) = @_; my $base; if (defined $new_uri) { $base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml}); } $base ||= $self->get_attribute ('base', namespace_uri => $NS{xml}); ref ($base) ? $base->inner_text : undef; } =item $tag = $x->start_tag Returns the start tag (or something that marks the start of something, such as '' for C<#comment> nodes). =cut sub end_tag ($) { my $self = shift; if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) { 'qname . '>'; } elsif ($self->{type} eq '#comment') { '-->'; } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) { '?>'; } elsif ($self->{type} eq '#reference') { ';'; } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) { '>'; } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) { '>'; } elsif ($self->{type} eq '#section') { if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') { ']]>'; } elsif ($self->_check_ncname ($self->{local_name})) { ']]>'; } else { # error ''; } } else { ''; } } =item $tag = $x->attribute_name Returns the attribute name. =cut sub attribute_name ($) { my $self = shift; if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { ($self->{namespace_uri} ? (ref $self->{parent} ? $self->{parent} : $self) ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '') .$self->{local_name}; } else { ''; } } =item $tag = $x->attribute_value Returns the attribute value. =cut sub attribute_value ($;%) { my $self = shift; my %o = @_; if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { my $r = '"'; if (ref ($self->{value}) && eval q($self->{value}->_CLASS_NAME eq $self->_CLASS_NAME)) { unshift @{$self->{node}}, $self->{value}; undef $self->{value}; } else { $r .= $self->_entitize ($self->{value}); } for (@{$self->{node}}) { my $nt = $_->node_type; if ($nt eq '#reference' || $nt eq '#xml') { $r .= $_->outer_xml; } elsif ($nt ne '#attribute') { $r .= $self->_entitize ($_->inner_text, percent => $o{escape_percent}); } } return $r . '"'; } else { ''; } } sub entity_value ($;%) { my $self = shift; my %o = @_; my $_entitize = sub { my $s = shift; $s =~ s/&/&/g; $s =~ s/&(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g; $s =~ s/%/%/g; $s =~ s/"/"/g; $s; }; if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { my $r = '"' . &$_entitize ($self->{value}); for (@{$self->{node}}) { my $nt = $_->node_type; if ($nt eq '#reference' || $nt eq '#xml') { $r .= $_->outer_xml; } elsif ($nt ne '#attribute') { $r .= &$_entitize ($_->inner_text); } } return $r . '"'; } else { ''; } } ## This method should be called only from SuikaWiki::Markup::XML::* family modules, ## since this is NOT a FORMAL interface. sub _entity_parameter_literal_value ($;%) { my $self = shift; my $r = $self->{value}; for (@{$self->{node}}) { my $nt = $_->node_type; #if ($nt eq '#reference' || $nt eq '#xml') { if ($nt eq '#xml') { $r .= $_->outer_xml; } elsif ($nt ne '#attribute') { $r .= $_->inner_text; } } return $r; } =item $tag = $x->attribute Returns the attribute (name and value pair). =cut sub attribute ($) { my $self = shift; if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { $self->attribute_name . '=' . $self->attribute_value; } else { ''; } } sub external_id ($;%) { my $self = shift; my %o = @_; my ($pubid, $sysid, $ndata); for (@{$self->{node}}) { if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) { if ($_->{local_name} eq 'PUBLIC') { $pubid = $_->inner_text; } elsif ($_->{local_name} eq 'SYSTEM') { $sysid = $_->inner_text; } elsif ($_->{local_name} eq 'NDATA') { $ndata = $_->inner_text; undef $ndata unless $self->_check_ncname ($ndata); } } } my $r = ''; if (length $pubid) { $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges; } if (length $sysid) { if ($sysid =~ /"/) { if ($sysid =~ /'/) { $sysid =~ s/"/"/; $sysid = '"' . $sysid . '"'; } else { $sysid = "'" . $sysid . "'"; } } else { $sysid = '"' . $sysid . '"'; } } if ($pubid && $sysid) { $r = 'PUBLIC "' . $pubid . '" ' . $sysid; } elsif ($sysid) { $r = 'SYSTEM ' . $sysid; } elsif ($pubid && $o{allow_pubid_only}) { $r = 'PUBLIC "' . $pubid . '"'; } if ($r && $ndata && $o{use_ndata}) { $r .= ' NDATA ' . $ndata; } $r; } =item $s = $x->content_spec Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>) or AttDef of attribute declaration (ex. C). =cut sub content_spec ($) { my $self = shift; if ($self->{type} eq '#element') { my $text = 0; my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}}; $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text; return $contentspec ? '(' . $contentspec . ')' : 'EMPTY'; } elsif ($self->{type} eq '#attribute') { my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t"; my $default = $self->{default_decl}; $default .= ' ' . $self->attribute_value if $default eq '#FIXED'; unless ($default) { $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED'; } return $attdef . $default; } } =item $tag = $x->inner_xml Returns the content of the node in XML syntax. (In case of the C<#element> nodes, element content without start- and end-tags is returned.) Note that for not all node types the behavior of this method is defined. For example, returned value of C<#attribute> might be unexpected one in this version of this module. =cut sub inner_xml ($;%) { my $self = shift; my %o = @_; my $r = ''; if ($self->{type} eq '#comment') { $r = $self->inner_text; $r =~ s/--/-E/g; } elsif ($self->{type} eq '#pi') { if (length $self->{value}) { $r = ' ' . $self->{value}; $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i. } for (@{$self->{node}}) { if ($_->node_type eq '#attribute') { $r .= ' ' . $_->attribute; } else { my $s = $_->inner_text; $s =~ s/\?>/?>/g; $r .= ' ' . $s if length $s; } } } elsif ($self->{type} eq '#reference') { if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') { $r = sprintf '%02X', $self->{value}; } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') { $r = sprintf '%02d', $self->{value}; } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') { $r = $self->{local_name}->{local_name}; } elsif ($self->_check_ncname ($self->{local_name})) { $r = ($self->{local_name}); } else { # error $r = ''; } } elsif ($self->{type} eq '#declaration') { if ($self->{namespace_uri} eq $NS{SGML}.'doctype') { my $root = $self->get_attribute ('qname'); ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do { for (@{$self->{parent}->{node}}) { if ($_->{type} eq '#element') { $root = $_->qname; last if $root; } } $root = '#IMPLIED'; }) : ($root = '#IMPLIED')); ## error! my ($isub, $xid) = ('', $self->external_id); for (@{$self->{node}}) { $isub .= $_->outer_xml if $_->{type} ne '#attribute'; } if ($xid) { $r = $xid; if ($isub) { $r .= " [" . $isub . "]"; } } else { if ($isub) { $r = "[" . $isub . "]"; } else { $r = "[]"; } } $r = $root . ' ' . $r; } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity' || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter' || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:notation') { my %xid_opt; $r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref} && $self->_check_ncname ($self->{local_name}); if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') { #$r = '% ' . $r; } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') { $xid_opt{use_ndata} = 1; } elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') { $xid_opt{allow_pubid_only} = 1; } my ($v, $xid) = ($self->{value}, $self->external_id (%xid_opt)); undef $xid if $self->{flag}->{smxp__defined_with_param_ref}; if ($xid) { ## External ID $r .= $xid; } else { ## EntityValue my $entity_value = $self->get_attribute ('value'); undef $entity_value if $self->{flag}->{smxp__defined_with_param_ref}; if ($entity_value) { # $r .= $entity_value->entity_value; } else { ## Parameter entity reference $r .= $self->{value}; for (@{$self->{node}}) { $r .= $_->outer_xml unless $_->{type} eq '#attribute'; } } } } elsif ($self->{namespace_uri} eq $NS{SGML}.'element') { my $qname = $self->get_attribute ('qname'); ref $qname ? $qname = $qname->inner_text : undef; if ($qname && $self->_check_name ($qname)) { ## Element type name is defined $r = $qname . ' '; if ($o{output_tag_omit_declaration}) { $r .= ($self->{flag}->{element_tag_start_omitable} ? 'o ' : '- ') . ($self->{flag}->{element_tag_end_omitable} ? 'o ' : '- '); } my $rs = $self->inner_text (output_ref_as_is => 1); $r .= $rs || 'ANY'; } else { #if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') { ### Element prototype is given # $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec; #} elsif () { ### Element type name and contentspec is given # $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY'); #} else { ## (Element type name and contetnspac) is given $r .= $self->inner_text (output_ref_as_is => 1); # || 'Name ANY'; # error #} } ## TODO: reform } elsif ($self->{local_name} eq 'ATTLIST') { if ($self->_check_name ($self->{target_name})) { $r = $self->{target_name}; } my $t = $self->inner_text (output_ref_as_is => 1); $r .= "\n\t" . $t if $t; $r ||= 'Name'; # error! for (@{$self->{node}}) { if ($_->{type} eq '#attribute') { $r .= "\n\t" . $_->content_spec; } } } else { # unknown for (@{$self->{node}}) { $r .= $_->outer_xml; } } } elsif ($self->{type} eq '#section' && !ref $self->{local_name} && $self->{local_name} eq 'CDATA') { $r = $self->inner_text; $r =~ s/]]>/]]>]]/g; } else { if ($self->{type} ne '#xml') { $r = $self->_entitize ($self->{value}); } else { $r = $self->{value}; } for (@{$self->{node}}) { my $nt = $_->node_type; if (($self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) { $r .= "\n"; } $r .= $_->outer_xml unless $_->node_type eq '#attribute'; } } $r; } =item $tag = $x->outer_xml Returns the node in XML syntax. =cut sub outer_xml ($) { my $self = shift; if ($self->{type} eq '#attribute') { $self->attribute; } else { if ($self->{option}->{indent} && $self->{type} eq '#element') { my $r = $self->start_tag; my $c = $self->inner_xml; if (!length $c && $self->{option}->{use_EmptyElemTag}) { substr ($r, -1) = ' />'; } else { if ($c) { $c =~ s/\n/\n /g; $r .= "\n " . $c . "\n"; } $r .= $self->end_tag; } $r; } else { my $r = $self->start_tag; my $c = $self->inner_xml; if ($self->{type} eq '#element' && !length ($c) && $self->{option}->{use_EmptyElemTag}) { substr ($r, -1) = ' />'; } else { $r .= $c . $self->end_tag; #$r .= "\n" if $self->{type} eq '#declaration'; } $r; #'{'.$self->{type}.': '.$r.'}'; # for debug } } } =item $tag = $x->inner_text Returns the text content of the node. (In many case the returned value is same as WinIE DOM C function's or XPath C function's. But some classes that inherits this module might implement to return other value (eg. to return the value of the alt attribute of html:img element). Available options: C. =cut sub inner_text ($;%) { my $self = shift; my %o = @_; my $r = ''; if ($self->{type} eq '#reference' && #) { #if ( ($self->{namespace_uri} eq $NS{SGML}.'char:ref' || $self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) { $r = chr $self->{value}; #} else { # general entity ref. or parameter entity ref. # ## TODO: how implement? is this ok? # my $em = $self->_get_entity_manager; # $r = $em->get_entity_value ($self->{local_name}, namespace_uri => $self->{namespace_uri}); #} } elsif ($self->{type} eq '#declaration' && ($self->{namespace_uri} eq $NS{SGML}.'entity' || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) { ## TODO: $r = $self->get_attribute ('value')->inner_text; } else { # not #reference nor #declaration(ENTITY) if (ref ($self->{value}) && eval q($self->{value}->_CLASS_NAME eq $self->_CLASS_NAME)) { unshift @{$self->{node}}, $self->{value}; undef $self->{value}; } else { $r = $self->{value}; } if ($o{output_ref_as_is}) { ## output as if RCDATA $r =~ s/&/&/g; for my $node (@{$self->{node}}) { my $nt = $node->node_type; if ($nt eq '#reference' || $nt eq '#xml') { $r .= $node->outer_xml; } elsif ($nt ne '#attribute') { $r .= map {s/&/&/g; $_} $node->inner_text; } } } else { for (@{$self->{node}}) { $r .= $_->inner_text unless $_->node_type eq '#attribute'; } } } $r; } {no warnings; # prototype mismatch *stringify = \&outer_xml; } sub _get_entity_manager ($) { my $self = shift; if ($self->{type} eq '#document') { unless ($self->{flag}->{smx__entity_manager}) { require SuikaWiki::Markup::XML::EntityManager; $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self); } return $self->{flag}->{smx__entity_manager}; } elsif (ref $self->{parent}) { return $self->{parent}->_get_entity_manager; } else { unless ($self->{flag}->{smx__entity_manager}) { require SuikaWiki::Markup::XML::EntityManager; $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self); } return $self->{flag}->{smx__entity_manager}; } } sub _CLASS_NAME ($) { 'SuikaWiki::Markup::XML'; } # $s = $x->_entitize ($s) sub _entitize ($$;%) { my ($self, $s, %o) = (shift, shift, @_); $s =~ s/&/&/g; $s =~ s//>/g; $s =~ s/"/"/g; $s =~ s/%/%/g if $o{percent}; $s =~ s/'/'/g if $o{apos}; $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&#%d;', ord $1/ge; $s; } # 1/0 = $x->_check_name ($s) sub _check_name ($$) { my $self = shift; my $s = shift; return $Cache{name}->{$s} if defined $Cache{name}->{$s}; if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) { # \p{...}('*'/'+'/'{n,}') does not work... $Cache{name}->{$s} = 1; 1; } else { $Cache{name}->{$s} = 0; 0; } } # 1/0 = $x->_check_ncname ($s) sub _check_ncname ($$) { my $self = shift; my $s = shift; return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s}; if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) { # \p{...}('*'/'+'/'{n,}') does not work... $Cache{ncname}->{$s} = 1; 1; } else { $Cache{ncname}->{$s} = 0; 0; } } # 1/0 = $x->_check_namespace_prefix ($s) sub _check_namespace_prefix ($$) { my $self = shift; my $s = shift; return 0 unless defined $s; return 1 if $s eq ''; substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':'; $self->_check_ncname ($s); } ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have ## multiple parents. ## TODO: normalize namespace URI (removing non URI chars) sub flag ($$;$) { my ($self, $name, $value) = @_; if (defined $value) { $self->{flag}->{$name} = $value; } $self->{flag}->{$name}; } sub option ($$;$) { my ($self, $name, $value) = @_; if (defined $value) { $self->{option}->{$name} = $value; } $self->{option}->{$name}; } =back =head1 NODE TYPES =over 4 =item #attribute Attribute. Its XML representation takes the form of NAME="VALUE". =item #comment Comment declarement. =item #declarement SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc. , , =item #element Element. Its XML representation consists of start tag, content and end tag, like content. =item #fragment Fragment of nodes. It's similar to DOM's fragment node. =item #pi Prosessing instruction. =item #reference Character reference or general or parameter entity reference. &#nnnn;, &#xhhhh;, &name;, %name;. =item #section Markup section. CDATA, INCLUDE and IGNORE are supported by XML. =item #text Text. =item #xml Preformatted XML text. =cut =head1 RESTRICTIONS =over 4 =item XML without XML Namespace is not supported. =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared. For example, next code generates invalid (non-well-formed) XML Namespace document. my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType'); print $x # So you must write like: my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType'); $x->define_new_namespace ('' => ''); print $x; # =back =head1 LICENSE Copyright 2003 Wakaba This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # $Date: 2003/06/27 13:07:44 $