#!/usr/local/bin/perl use utf8; ## This file is written in UTF-8 use strict; require 'mkpm.pl'; use vars qw(%PROP %SET %SET_ALIAS); $PROP{module_name} = 'XML'; $PROP{version} = do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; $PROP{author_name} = 'Wakaba'; $PROP{author_mail} = 'w@suika.fam.cx'; $PROP{pod_description} = <. First Edition, 1998-02-10, . Second Edition, 2000-10-06, . "XML 1.0 Specification Errata", . The errata list of XML 1.0 First Edition. "XML 1.0 Second Edition Specification Errata", . The errata list of XML 1.0 Second Edition. "Namespaces in XML", W3C Recommendation, . First Edition, 1999-01-14, . "Namespaces in XML Errata", . "Unicode in XML and other Markup Languages", Unicode Technical Report #20, W3C Note, . This version of this module refers 2003-06-13 version of the W3C Note . EOH $SET{Char} = < defined in XML 1.0 spec (#2, ) !0009 !000A !000D !0020 D7FF !E000 FFFD !10000 10FFFF EOH $SET{S} = < defined in XML 1.0 spec (#3, ) !0009 !000A !000D !0020 EOH $SET{BaseChar} = qq(#DESCRIPTION C defined in XML 1.0 spec (#85, ) ).xml_ebnf_to_charlist (< defined in XML 1.0 spec (#86, ) ).xml_ebnf_to_charlist (q([#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029])); $SET{CombiningChar} = qq(#DESCRIPTION C defined in XML 1.0 spec (#87, ) ).xml_ebnf_to_charlist (< defined in XML 1.0 spec (#88, ) ).xml_ebnf_to_charlist (< defined in XML 1.0 spec (#89, ) ).xml_ebnf_to_charlist (< (::= C / C) defined in XML 1.0 spec (#84, ) ); for (split /\n/, $SET{BaseChar} . $SET{Ideographic}) { $SET{Letter} .= $_ . "\n" unless /^\#/; } $SET{NameChar} = < defined in XML 1.0 spec (#4, ) .-_: EOH $SET{_NameStartChar} = < defined in XML 1.0 spec (#5, ) _: EOH $SET{NCNameChar} = < defined in Namespace in XML spec (#5, ) .-_ EOH $SET{_NCNameStartChar} = < defined in Namespace in XML spec (#4, ) _ EOH for (split /\n/, $SET{Letter}) { $SET{_NameStartChar} .= $_ . "\n" unless /^\#/; $SET{NameChar} .= $_ . "\n" unless /^\#/; $SET{_NCNameStartChar} .= $_ . "\n" unless /^\#/; $SET{NCNameChar} .= $_ . "\n" unless /^\#/; } for (split /\n/, $SET{Digit} . $SET{CombiningChar} . $SET{Extender}) { $SET{NameChar} .= $_ . "\n" unless /^\#/; $SET{NCNameChar} .= $_ . "\n" unless /^\#/; } $SET{PubidChar} = <<'EOH'; #DESCRIPTION C defined in XML 1.0 spec (#13, ) !000A !000D !0020 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -'()+,./:=?;!*#@$_% EOH $SET{VersionNum} = <<'EOH'; #DESCRIPTION Characters are elements of C defined in XML 1.0 spec (#26, ) 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -_.: EOH $SET{_deprecated_noncharacter} = <<'EOH'; #DESCRIPTION Additional deprecated characters in XML 1.0 SE errata (E46, ) ("noncharacter" in Unicode) !007F 0084 !0086 009F !FD00 FD0F !1FFFE 1FFFF !2FFFE 2FFFF !3FFFE 3FFFF !4FFFE 4FFFF !5FFFE 5FFFF !6FFFE 6FFFF !7FFFE 7FFFF !8FFFE 8FFFF !9FFFE 9FFFF !AFFFE AFFFF !BFFFE BFFFF !CFFFE CFFFF !DFFFE DFFFF !EFFFE EFFFF !FFFFE FFFFF !10FFFE 10FFFF EOH $SET{_unicode_xml_not_suitable} = <<'EOH'; #DESCRIPTION Characters not suitable for use with markup (Table 3.1 of ) !2028 202E !206A 206F !FEFF !FFF9 FFFC !1D173 1D17A !E0000 E007F EOH $SET{_unicode_xml_suitable_format_character} = <<'EOH'; #DESCRIPTION Some characters that affect text format but are suitable for use with markup (Table 4.1 of ) !00A0 !00AD !0363 !0600 0603 !06DD !070C !0F0C !180B 180E !200C 200F !2011 !202F !2044 !2060 2063 !2FF0 2FFB !303E !FE00 FE0F !E0100 E01DF EOH sub xml_ebnf_to_charlist ($) { my $r = ''; for my $p (split /\s*\|\s*/, shift) { if ($p =~ /\[\#x([0-9A-Fa-f]+)-\#x([0-9A-Fa-f]+)\]/) { $r .= uc "!$1 $2 \n"; } elsif ($p =~ /#x([0-9A-Fa-f]+)/) { $r .= uc "!$1\n"; } } $r; } &print_module; ## $Date: 2003/06/15 01:58:58 $ ### XML-src.upl ends here