#!/usr/local/bin/perl use utf8; ## This file is written in UTF-8 use strict; require 'mkpm.pl'; use vars qw(%PROP %SET %SET_ALIAS); $PROP{module_name} = 'XML'; $PROP{version} = do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; $PROP{author_name} = 'Wakaba'; $PROP{author_mail} = 'w@suika.fam.cx'; $PROP{pod_description} = <. First Edition, 1998-02-10, . "XML 1.0 Specification Errata", Errata for First Edition, . Second Edition, 2000-10-06, . "XML 1.0 Second Edition Specification Errata", Errata for Second Edition, . Third Edition, 2004-02-04, . "XML 1.0 Third Edition Specification Errata", . =item XML Namespace 1.0 "Namespaces in XML", W3C Recommendation, . First Edition, 1999-01-14, . "Namespaces in XML Errata", . =item XML 1.1 "Extensible Markup Language (XML) 1.1", W3C Recommendation, . First Edition, 2004-02-04. Edited 2004-04-15, . "XML 1.1 First Edition Specification Errata", . =item XML Namespace 1.1 "Namespaces in XML 1.1", W3C Recommendation, . First Edition, 2004-04-02, . "Namespaces in XML 1.1 Errata", . =item Misc. "Unicode in XML and other Markup Languages", Unicode Technical Report #20, W3C Note, . This version of this module refers 2003-06-13 version of the W3C Note . =back EOH $SET{Char10} = < defined in XML 1.0 spec (#2, ) !0009 !000A !000D !0020 D7FF !E000 FFFD !10000 10FFFF EOH $SET{Char11} = < defined in XML 1.1 spec (#2, ) !0001 D7FF !E000 FFFD !10000 10FFFF EOH $SET_ALIAS{Char} = 'Char11'; $SET{RestrictedChar11} = < defined in XML 1.1 spec (#2a, ) !0001 0008 !000B !000C !000E 001F !007F 0084 !0086 009F EOH $SET_ALIAS{RestrictedChar} = 'RestrictedChar11'; $SET_ALIAS{_UnrestrictedChar10} = 'Char10'; $SET{_UnrestrictedChar11} = < - C !0009 !000A !000D !0020 007E !0085 !00A0 D7FF !E000 FFFD !10000 10FFFF EOH $SET_ALIAS{_UnrestrictedChar} = '_UnrestrictedChar11'; $SET{S} = < defined in XML 1.0 spec (#3, ) !0009 !000A !000D !0020 EOH $SET{BaseChar} = qq(#DESCRIPTION C defined in XML 1.0 spec (#85, ) ).xml_ebnf_to_charlist (< defined in XML 1.0 spec (#86, ) ).xml_ebnf_to_charlist (q([#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029])); $SET{CombiningChar} = qq(#DESCRIPTION C defined in XML 1.0 spec (#87, ) ).xml_ebnf_to_charlist (< defined in XML 1.0 spec (#88, ) ).xml_ebnf_to_charlist (< defined in XML 1.0 spec (#89, ) ).xml_ebnf_to_charlist (< (::= C / C) defined in XML 1.0 spec (#84, ) ); for (split /\n/, $SET{BaseChar} . $SET{Ideographic}) { $SET{Letter} .= $_ . "\n" unless /^\#/; } $SET{NameChar10} = < defined in XML 1.0 spec (#4, ) .-_: EOH $SET{_NameStartChar10} = < defined in XML 1.0 spec (#5, ) _: EOH $SET{NCNameChar10} = < defined in Namespace in XML spec (#5, ) .-_ EOH $SET{_NCNameStartChar10} = < defined in Namespace in XML spec (#4, ) _ EOH for (split /\n/, $SET{Letter}) { $SET{_NameStartChar10} .= $_ . "\n" unless /^\#/; $SET{NameChar10} .= $_ . "\n" unless /^\#/; $SET{_NCNameStartChar10} .= $_ . "\n" unless /^\#/; $SET{NCNameChar10} .= $_ . "\n" unless /^\#/; } for (split /\n/, $SET{Digit} . $SET{CombiningChar} . $SET{Extender}) { $SET{NameChar10} .= $_ . "\n" unless /^\#/; $SET{NCNameChar10} .= $_ . "\n" unless /^\#/; } $SET{NameStartChar11} = <<'EOH'; #DESCRIPTION C defined in XML 1.1 spec (#4, ) !00C0 00D6 !00D8 00F6 !00F8 02FF !0370 037D !037F 1FFF !200C 200D !2070 218F !2C00 2FEF !3001 D7FF !F900 FDCF !FDF0 FFFD !10000 EFFFF ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz :_ EOH $SET_ALIAS{NameStartChar} = 'NameStartChar11'; $SET_ALIAS{_NameStartChar} = 'NameStartChar11'; $SET{NCNameStartChar11} = <<'EOH'; #DESCRIPTION C defined in XML Namespace 1.1 spec !00C0 00D6 !00D8 00F6 !00F8 02FF !0370 037D !037F 1FFF !200C 200D !2070 218F !2C00 2FEF !3001 D7FF !F900 FDCF !FDF0 FFFD !10000 EFFFF ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_ EOH $SET_ALIAS{NCNameStartChar} = 'NCNameStartChar11'; $SET_ALIAS{_NCNameStartChar} = 'NCNameStartChar11'; $SET{NameChar11} = <<'EOH'; #DESCRIPTION C defined in XML 1.1 spec (#4a, ) !00C0 00D6 !00D8 00F6 !00F8 02FF !0370 037D !037F 1FFF !200C 200D !2070 218F !2C00 2FEF !3001 D7FF !F900 FDCF !FDF0 FFFD !10000 EFFFF ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz :_ !00B7 !0300 036F !203F 2040 -.0123456789 EOH $SET_ALIAS{NameChar} = 'NameChar11'; $SET{NCNameChar11} = <<'EOH'; #DESCRIPTION C defined in XML Namespace 1.1 spec !00C0 00D6 !00D8 00F6 !00F8 02FF !0370 037D !037F 1FFF !200C 200D !2070 218F !2C00 2FEF !3001 D7FF !F900 FDCF !FDF0 FFFD !10000 EFFFF ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_ !00B7 !0300 036F !203F 2040 -.0123456789 EOH $SET_ALIAS{NCNameChar} = 'NCNameChar11'; ## TODO: XML 1.1 Appendix I $SET{PubidChar} = <<'EOH'; #DESCRIPTION C defined in XML 1.0 spec (#13, ) !000A !000D !0020 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -'()+,./:=?;!*#@$_% EOH $SET{VersionNum} = <<'EOH'; #DESCRIPTION Characters are elements of C defined in First and Second Editions of XML 1.0 spec (#26, ) 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -_.: EOH $SET{_deprecated_noncharacter} = <<'EOH'; #DESCRIPTION Additional deprecated characters in XML 1.0 SE errata (E46, ) and TE ("noncharacter" in Unicode) !007F 0084 !0086 009F !FD00 FD0F !1FFFE 1FFFF !2FFFE 2FFFF !3FFFE 3FFFF !4FFFE 4FFFF !5FFFE 5FFFF !6FFFE 6FFFF !7FFFE 7FFFF !8FFFE 8FFFF !9FFFE 9FFFF !AFFFE AFFFF !BFFFE BFFFF !CFFFE CFFFF !DFFFE DFFFF !EFFFE EFFFF !FFFFE FFFFF !10FFFE 10FFFF EOH $SET{_unicode_xml_not_suitable} = <<'EOH'; #DESCRIPTION Characters not suitable for use with markup (Table 3.1 of ) !2028 202E !206A 206F !FEFF !FFF9 FFFC !1D173 1D17A !E0000 E007F EOH $SET{_unicode_xml_suitable_format_character} = <<'EOH'; #DESCRIPTION Some characters that affect text format but are suitable for use with markup (Table 4.1 of ) !00A0 !00AD !0363 !0600 0603 !06DD !070C !0F0C !180B 180E !200C 200F !2011 !202F !2044 !2060 2063 !2FF0 2FFB !303E !FE00 FE0F !E0100 E01DF EOH sub xml_ebnf_to_charlist ($) { my $r = ''; for my $p (split /\s*\|\s*/, shift) { if ($p =~ /\[\#x([0-9A-Fa-f]+)-\#x([0-9A-Fa-f]+)\]/) { $r .= uc "!$1 $2 \n"; } elsif ($p =~ /#x([0-9A-Fa-f]+)/) { $r .= uc "!$1\n"; } } $r; } &print_module; ## $Date: 2004/06/04 08:29:56 $ ### XML-src.upl ends here