1 |
wakaba |
1.1 |
#!/usr/local/bin/perl
|
2 |
|
|
use utf8; ## This file is written in UTF-8
|
3 |
|
|
use strict;
|
4 |
|
|
require 'mkpm.pl';
|
5 |
|
|
use vars qw(%PROP %SET %SET_ALIAS);
|
6 |
|
|
$PROP{module_name} = 'XML';
|
7 |
wakaba |
1.3 |
$PROP{version} = do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
|
8 |
wakaba |
1.1 |
$PROP{author_name} = 'Wakaba';
|
9 |
|
|
$PROP{author_mail} = 'w@suika.fam.cx';
|
10 |
|
|
|
11 |
wakaba |
1.2 |
$PROP{pod_description} = <<EOH;
|
12 |
|
|
Character classes for XML, the Extensible Markup Language
|
13 |
|
|
EOH
|
14 |
|
|
|
15 |
|
|
$PROP{pod_see_also} = <<EOH;
|
16 |
wakaba |
1.3 |
|
17 |
|
|
=over 2
|
18 |
|
|
|
19 |
|
|
=item XML 1.0
|
20 |
|
|
|
21 |
wakaba |
1.2 |
"Extensible Markup Language (XML) 1.0", W3C Recommendation,
|
22 |
wakaba |
1.3 |
<http://www.w3.org/TR/REC-xml>.
|
23 |
|
|
|
24 |
|
|
First Edition, 1998-02-10,
|
25 |
wakaba |
1.2 |
<http://www.w3.org/TR/1998/REC-xml-19980210>.
|
26 |
wakaba |
1.3 |
|
27 |
|
|
"XML 1.0 Specification Errata", Errata for First Edition,
|
28 |
|
|
<http://www.w3.org/XML/xml-19980210-errata>.
|
29 |
|
|
|
30 |
wakaba |
1.2 |
Second Edition, 2000-10-06, <http://www.w3.org/TR/2000/WD-xml-2e-20000814>.
|
31 |
|
|
|
32 |
wakaba |
1.3 |
"XML 1.0 Second Edition Specification Errata", Errata for Second Edition,
|
33 |
|
|
<http://www.w3.org/XML/xml-V10-2e-errata>.
|
34 |
|
|
|
35 |
|
|
Third Edition, 2004-02-04, <http://www.w3.org/TR/2004/REC-xml-20040204>.
|
36 |
wakaba |
1.2 |
|
37 |
wakaba |
1.3 |
"XML 1.0 Third Edition Specification Errata",
|
38 |
|
|
<http://www.w3.org/XML/xml-V10-3e-errata>.
|
39 |
|
|
|
40 |
|
|
=item XML Namespace 1.0
|
41 |
wakaba |
1.2 |
|
42 |
|
|
"Namespaces in XML", W3C Recommendation, <http://www.w3.org/TR/REC-xml-names>.
|
43 |
wakaba |
1.3 |
|
44 |
wakaba |
1.2 |
First Edition, 1999-01-14, <http://www.w3.org/TR/1999/REC-xml-names-19990114>.
|
45 |
|
|
|
46 |
|
|
"Namespaces in XML Errata", <http://www.w3.org/XML/xml-names-19990114-errata>.
|
47 |
|
|
|
48 |
wakaba |
1.3 |
=item XML 1.1
|
49 |
|
|
|
50 |
|
|
"Extensible Markup Language (XML) 1.1", W3C Recommendation,
|
51 |
|
|
<http://www.w3.org/TR/xml11>.
|
52 |
|
|
|
53 |
|
|
First Edition, 2004-02-04. Edited 2004-04-15,
|
54 |
|
|
<http://www.w3.org/TR/2004/REC-xml11-20040204/>.
|
55 |
|
|
|
56 |
|
|
"XML 1.1 First Edition Specification Errata",
|
57 |
|
|
<http://www.w3.org/XML/xml-V11-1e-errata>.
|
58 |
|
|
|
59 |
|
|
=item XML Namespace 1.1
|
60 |
|
|
|
61 |
|
|
"Namespaces in XML 1.1", W3C Recommendation,
|
62 |
|
|
<http://www.w3.org/TR/xml-names11>.
|
63 |
|
|
|
64 |
|
|
First Edition, 2004-04-02,
|
65 |
|
|
<http://www.w3.org/TR/2004/REC-xml-names11-20040204>.
|
66 |
|
|
|
67 |
|
|
"Namespaces in XML 1.1 Errata",
|
68 |
|
|
<http://www.w3.org/XML/2004/xml-names11-errata>.
|
69 |
|
|
|
70 |
|
|
=item Misc.
|
71 |
|
|
|
72 |
wakaba |
1.2 |
"Unicode in XML and other Markup Languages", Unicode Technical Report #20,
|
73 |
|
|
W3C Note, <http://www.w3.org/TR/unicode-xml/>. This version of this module
|
74 |
|
|
refers 2003-06-13 version of the W3C Note
|
75 |
|
|
<http://www.w3.org/TR/2003/NOTE-unicode-xml-20030613/>.
|
76 |
wakaba |
1.3 |
|
77 |
|
|
=back
|
78 |
|
|
|
79 |
|
|
EOH
|
80 |
|
|
|
81 |
|
|
$SET{Char10} = <<EOH;
|
82 |
|
|
#DESCRIPTION C<Char> defined in XML 1.0 spec (#2, <http://www.w3.org/TR/REC-xml#N\
|
83 |
|
|
T-Char>)
|
84 |
|
|
!0009
|
85 |
|
|
!000A
|
86 |
|
|
!000D
|
87 |
|
|
!0020 D7FF
|
88 |
|
|
!E000 FFFD
|
89 |
|
|
!10000 10FFFF
|
90 |
|
|
EOH
|
91 |
|
|
|
92 |
|
|
$SET{Char11} = <<EOH;
|
93 |
|
|
#DESCRIPTION C<Char> defined in XML 1.1 spec (#2, <http://www.w3.org/TR/xml11#NT-Char>)
|
94 |
|
|
!0001 D7FF
|
95 |
|
|
!E000 FFFD
|
96 |
|
|
!10000 10FFFF
|
97 |
wakaba |
1.2 |
EOH
|
98 |
|
|
|
99 |
wakaba |
1.3 |
$SET_ALIAS{Char} = 'Char11';
|
100 |
|
|
|
101 |
|
|
$SET{RestrictedChar11} = <<EOH;
|
102 |
|
|
#DESCRIPTION C<RestrictedChar> defined in XML 1.1 spec (#2a, <http://www.w3.org/TR/xml11#NT-RestrictedChar>)
|
103 |
|
|
!0001 0008
|
104 |
|
|
!000B
|
105 |
|
|
!000C
|
106 |
|
|
!000E 001F
|
107 |
|
|
!007F 0084
|
108 |
|
|
!0086 009F
|
109 |
|
|
EOH
|
110 |
|
|
|
111 |
|
|
$SET_ALIAS{RestrictedChar} = 'RestrictedChar11';
|
112 |
|
|
|
113 |
|
|
$SET_ALIAS{_UnrestrictedChar10} = 'Char10';
|
114 |
|
|
|
115 |
|
|
$SET{_UnrestrictedChar11} = <<EOH;
|
116 |
|
|
#DESCRIPTION C<Char11> - C<RestrictedChar11>
|
117 |
wakaba |
1.1 |
!0009
|
118 |
|
|
!000A
|
119 |
|
|
!000D
|
120 |
wakaba |
1.3 |
!0020 007E
|
121 |
|
|
!0085
|
122 |
|
|
!00A0 D7FF
|
123 |
wakaba |
1.1 |
!E000 FFFD
|
124 |
|
|
!10000 10FFFF
|
125 |
|
|
EOH
|
126 |
|
|
|
127 |
wakaba |
1.3 |
$SET_ALIAS{_UnrestrictedChar} = '_UnrestrictedChar11';
|
128 |
|
|
|
129 |
wakaba |
1.1 |
$SET{S} = <<EOH;
|
130 |
|
|
#DESCRIPTION Characters are elements of C<S> defined in XML 1.0 spec (#3, <http://www.w3.org/TR/REC-xml#NT-S>)
|
131 |
|
|
!0009
|
132 |
|
|
!000A
|
133 |
|
|
!000D
|
134 |
|
|
!0020
|
135 |
|
|
EOH
|
136 |
|
|
|
137 |
|
|
$SET{BaseChar} = qq(#DESCRIPTION C<BaseChar> defined in XML 1.0 spec (#85, <http://www.w3.org/TR/REC-xml#NT-BaseChar>)
|
138 |
|
|
).xml_ebnf_to_charlist (<<EOH);
|
139 |
|
|
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]
|
140 |
|
|
EOH
|
141 |
|
|
|
142 |
|
|
$SET{Ideographic} = qq(#DESCRIPTION C<Ideographic> defined in XML 1.0 spec (#86, <http://www.w3.org/TR/REC-xml#NT-Ideographic>)
|
143 |
|
|
).xml_ebnf_to_charlist (q([#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]));
|
144 |
|
|
|
145 |
|
|
$SET{CombiningChar} = qq(#DESCRIPTION C<CombiningChar> defined in XML 1.0 spec (#87, <http://www.w3.org/TR/REC-xml#NT-CombiningChar>)
|
146 |
|
|
).xml_ebnf_to_charlist (<<EOH);
|
147 |
|
|
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A
|
148 |
|
|
EOH
|
149 |
|
|
|
150 |
|
|
$SET{Digit} = qq(#DESCRIPTION C<Digit> defined in XML 1.0 spec (#88, <http://www.w3.org/TR/REC-xml#NT-Digit>)
|
151 |
|
|
).xml_ebnf_to_charlist (<<EOH);
|
152 |
|
|
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
|
153 |
|
|
EOH
|
154 |
|
|
|
155 |
|
|
$SET{Extender} = qq(#DESCRIPTION C<Extender> defined in XML 1.0 spec (#89, <http://www.w3.org/TR/REC-xml#NT-Extender>)
|
156 |
|
|
).xml_ebnf_to_charlist (<<EOH);
|
157 |
|
|
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
|
158 |
|
|
EOH
|
159 |
|
|
|
160 |
|
|
$SET{Letter} = qq(#DESCRIPTION C<Letter> (::= C<BaseChar> / C<Ideographic>) defined in XML 1.0 spec (#84, <http://www.w3.org/TR/REC-xml#NT-Letter>)
|
161 |
|
|
);
|
162 |
|
|
for (split /\n/, $SET{BaseChar} . $SET{Ideographic}) {
|
163 |
|
|
$SET{Letter} .= $_ . "\n" unless /^\#/;
|
164 |
|
|
}
|
165 |
|
|
|
166 |
wakaba |
1.3 |
$SET{NameChar10} = <<EOH;
|
167 |
wakaba |
1.1 |
#DESCRIPTION Characters are elements of C<NameChar> defined in XML 1.0 spec (#4, <http://www.w3.org/TR/REC-xml#NT-NameChar>)
|
168 |
|
|
.-_:
|
169 |
|
|
EOH
|
170 |
wakaba |
1.3 |
$SET{_NameStartChar10} = <<EOH;
|
171 |
wakaba |
1.1 |
#DESCRIPTION Characters can be the first char of C<Name> defined in XML 1.0 spec (#5, <http://www.w3.org/TR/REC-xml#NT-Name>)
|
172 |
|
|
_:
|
173 |
|
|
EOH
|
174 |
wakaba |
1.3 |
$SET{NCNameChar10} = <<EOH;
|
175 |
wakaba |
1.1 |
#DESCRIPTION Characters are elements of C<NCNameChar> defined in Namespace in XML spec (#5, <http://www.w3.org/TR/REC-xml-names#NT-NCNameChar>)
|
176 |
|
|
.-_
|
177 |
|
|
EOH
|
178 |
wakaba |
1.3 |
$SET{_NCNameStartChar10} = <<EOH;
|
179 |
wakaba |
1.1 |
#DESCRIPTION Characters can be the first char of C<NCName> defined in Namespace in XML spec (#4, <http://www.w3.org/TR/REC-xml-names#NT-NCName>)
|
180 |
|
|
_
|
181 |
|
|
EOH
|
182 |
|
|
for (split /\n/, $SET{Letter}) {
|
183 |
wakaba |
1.3 |
$SET{_NameStartChar10} .= $_ . "\n" unless /^\#/;
|
184 |
|
|
$SET{NameChar10} .= $_ . "\n" unless /^\#/;
|
185 |
|
|
$SET{_NCNameStartChar10} .= $_ . "\n" unless /^\#/;
|
186 |
|
|
$SET{NCNameChar10} .= $_ . "\n" unless /^\#/;
|
187 |
wakaba |
1.1 |
}
|
188 |
|
|
for (split /\n/, $SET{Digit} . $SET{CombiningChar} . $SET{Extender}) {
|
189 |
wakaba |
1.3 |
$SET{NameChar10} .= $_ . "\n" unless /^\#/;
|
190 |
|
|
$SET{NCNameChar10} .= $_ . "\n" unless /^\#/;
|
191 |
wakaba |
1.1 |
}
|
192 |
|
|
|
193 |
wakaba |
1.3 |
$SET{NameStartChar11} = <<'EOH';
|
194 |
|
|
#DESCRIPTION C<NameStartChar> defined in XML 1.1 spec (#4, <http://www.w3.org/TR/xml11#NT-NameStartChar>)
|
195 |
|
|
!00C0 00D6
|
196 |
|
|
!00D8 00F6
|
197 |
|
|
!00F8 02FF
|
198 |
|
|
!0370 037D
|
199 |
|
|
!037F 1FFF
|
200 |
|
|
!200C 200D
|
201 |
|
|
!2070 218F
|
202 |
|
|
!2C00 2FEF
|
203 |
|
|
!3001 D7FF
|
204 |
|
|
!F900 FDCF
|
205 |
|
|
!FDF0 FFFD
|
206 |
|
|
!10000 EFFFF
|
207 |
|
|
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
208 |
|
|
:_
|
209 |
|
|
EOH
|
210 |
|
|
|
211 |
|
|
$SET_ALIAS{NameStartChar} = 'NameStartChar11';
|
212 |
|
|
$SET_ALIAS{_NameStartChar} = 'NameStartChar11';
|
213 |
|
|
|
214 |
|
|
|
215 |
|
|
$SET{NCNameStartChar11} = <<'EOH';
|
216 |
|
|
#DESCRIPTION C<NCNameStartChar> defined in XML Namespace 1.1 spec
|
217 |
|
|
!00C0 00D6
|
218 |
|
|
!00D8 00F6
|
219 |
|
|
!00F8 02FF
|
220 |
|
|
!0370 037D
|
221 |
|
|
!037F 1FFF
|
222 |
|
|
!200C 200D
|
223 |
|
|
!2070 218F
|
224 |
|
|
!2C00 2FEF
|
225 |
|
|
!3001 D7FF
|
226 |
|
|
!F900 FDCF
|
227 |
|
|
!FDF0 FFFD
|
228 |
|
|
!10000 EFFFF
|
229 |
|
|
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_
|
230 |
|
|
EOH
|
231 |
|
|
|
232 |
|
|
$SET_ALIAS{NCNameStartChar} = 'NCNameStartChar11';
|
233 |
|
|
$SET_ALIAS{_NCNameStartChar} = 'NCNameStartChar11';
|
234 |
|
|
|
235 |
|
|
$SET{NameChar11} = <<'EOH';
|
236 |
|
|
#DESCRIPTION C<NameChar> defined in XML 1.1 spec (#4a, <http://www.w3.org/TR/xml11#NT-NameChar>)
|
237 |
|
|
!00C0 00D6
|
238 |
|
|
!00D8 00F6
|
239 |
|
|
!00F8 02FF
|
240 |
|
|
!0370 037D
|
241 |
|
|
!037F 1FFF
|
242 |
|
|
!200C 200D
|
243 |
|
|
!2070 218F
|
244 |
|
|
!2C00 2FEF
|
245 |
|
|
!3001 D7FF
|
246 |
|
|
!F900 FDCF
|
247 |
|
|
!FDF0 FFFD
|
248 |
|
|
!10000 EFFFF
|
249 |
|
|
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
250 |
|
|
:_
|
251 |
|
|
|
252 |
|
|
!00B7
|
253 |
|
|
!0300 036F
|
254 |
|
|
!203F 2040
|
255 |
|
|
-.0123456789
|
256 |
|
|
EOH
|
257 |
|
|
|
258 |
|
|
$SET_ALIAS{NameChar} = 'NameChar11';
|
259 |
|
|
|
260 |
|
|
$SET{NCNameChar11} = <<'EOH';
|
261 |
|
|
#DESCRIPTION C<NCNameChar> defined in XML Namespace 1.1 spec
|
262 |
|
|
!00C0 00D6
|
263 |
|
|
!00D8 00F6
|
264 |
|
|
!00F8 02FF
|
265 |
|
|
!0370 037D
|
266 |
|
|
!037F 1FFF
|
267 |
|
|
!200C 200D
|
268 |
|
|
!2070 218F
|
269 |
|
|
!2C00 2FEF
|
270 |
|
|
!3001 D7FF
|
271 |
|
|
!F900 FDCF
|
272 |
|
|
!FDF0 FFFD
|
273 |
|
|
!10000 EFFFF
|
274 |
|
|
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_
|
275 |
|
|
|
276 |
|
|
!00B7
|
277 |
|
|
!0300 036F
|
278 |
|
|
!203F 2040
|
279 |
|
|
-.0123456789
|
280 |
|
|
EOH
|
281 |
|
|
|
282 |
|
|
$SET_ALIAS{NCNameChar} = 'NCNameChar11';
|
283 |
|
|
|
284 |
|
|
## TODO: XML 1.1 Appendix I
|
285 |
wakaba |
1.1 |
|
286 |
|
|
$SET{PubidChar} = <<'EOH';
|
287 |
|
|
#DESCRIPTION C<PubidChar> defined in XML 1.0 spec (#13, <http://www.w3.org/TR/REC-xml#NT-PubidChar>)
|
288 |
|
|
!000A
|
289 |
|
|
!000D
|
290 |
|
|
!0020
|
291 |
|
|
0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
292 |
|
|
-'()+,./:=?;!*#@$_%
|
293 |
|
|
EOH
|
294 |
|
|
|
295 |
|
|
$SET{VersionNum} = <<'EOH';
|
296 |
wakaba |
1.3 |
#DESCRIPTION Characters are elements of C<VersionNum> defined in First and Second Editions of XML 1.0 spec (#26, <http://www.w3.org/TR/2000/REC-xml-20001006#NT-VersionNum>)
|
297 |
wakaba |
1.1 |
0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
298 |
|
|
-_.:
|
299 |
|
|
EOH
|
300 |
|
|
|
301 |
wakaba |
1.2 |
$SET{_deprecated_noncharacter} = <<'EOH';
|
302 |
wakaba |
1.3 |
#DESCRIPTION Additional deprecated characters in XML 1.0 SE errata (E46, <http://www.w3.org/XML/xml-V10-2e-errata#E46>) and TE <http://www.w3.org/TR/REC-xml/#charsets> ("noncharacter" in Unicode)
|
303 |
wakaba |
1.2 |
!007F 0084
|
304 |
|
|
!0086 009F
|
305 |
|
|
!FD00 FD0F
|
306 |
|
|
!1FFFE 1FFFF
|
307 |
|
|
!2FFFE 2FFFF
|
308 |
|
|
!3FFFE 3FFFF
|
309 |
|
|
!4FFFE 4FFFF
|
310 |
|
|
!5FFFE 5FFFF
|
311 |
|
|
!6FFFE 6FFFF
|
312 |
|
|
!7FFFE 7FFFF
|
313 |
|
|
!8FFFE 8FFFF
|
314 |
|
|
!9FFFE 9FFFF
|
315 |
|
|
!AFFFE AFFFF
|
316 |
|
|
!BFFFE BFFFF
|
317 |
|
|
!CFFFE CFFFF
|
318 |
|
|
!DFFFE DFFFF
|
319 |
|
|
!EFFFE EFFFF
|
320 |
|
|
!FFFFE FFFFF
|
321 |
|
|
!10FFFE 10FFFF
|
322 |
|
|
EOH
|
323 |
|
|
|
324 |
|
|
$SET{_unicode_xml_not_suitable} = <<'EOH';
|
325 |
|
|
#DESCRIPTION Characters not suitable for use with markup (Table 3.1 of <http://www.w3.org/TR/unicode-xml/#Charlist>)
|
326 |
|
|
!2028 202E
|
327 |
|
|
!206A 206F
|
328 |
|
|
!FEFF
|
329 |
|
|
!FFF9 FFFC
|
330 |
|
|
!1D173 1D17A
|
331 |
|
|
!E0000 E007F
|
332 |
|
|
EOH
|
333 |
|
|
|
334 |
|
|
$SET{_unicode_xml_suitable_format_character} = <<'EOH';
|
335 |
|
|
#DESCRIPTION Some characters that affect text format but are suitable for use with markup (Table 4.1 of <http://www.w3.org/TR/unicode-xml/#Charlist>)
|
336 |
|
|
!00A0
|
337 |
|
|
!00AD
|
338 |
|
|
!0363
|
339 |
|
|
!0600 0603
|
340 |
|
|
!06DD
|
341 |
|
|
!070C
|
342 |
|
|
!0F0C
|
343 |
|
|
!180B 180E
|
344 |
|
|
!200C 200F
|
345 |
|
|
!2011
|
346 |
|
|
!202F
|
347 |
|
|
!2044
|
348 |
|
|
!2060 2063
|
349 |
|
|
!2FF0 2FFB
|
350 |
|
|
!303E
|
351 |
|
|
!FE00 FE0F
|
352 |
|
|
!E0100 E01DF
|
353 |
|
|
EOH
|
354 |
|
|
|
355 |
wakaba |
1.1 |
sub xml_ebnf_to_charlist ($) {
|
356 |
|
|
my $r = '';
|
357 |
|
|
for my $p (split /\s*\|\s*/, shift) {
|
358 |
|
|
if ($p =~ /\[\#x([0-9A-Fa-f]+)-\#x([0-9A-Fa-f]+)\]/) {
|
359 |
|
|
$r .= uc "!$1 $2 \n";
|
360 |
|
|
} elsif ($p =~ /#x([0-9A-Fa-f]+)/) {
|
361 |
|
|
$r .= uc "!$1\n";
|
362 |
|
|
}
|
363 |
|
|
}
|
364 |
|
|
$r;
|
365 |
|
|
}
|
366 |
|
|
|
367 |
|
|
&print_module;
|
368 |
|
|
|
369 |
wakaba |
1.3 |
## $Date: 2003/06/15 01:58:58 $
|
370 |
wakaba |
1.1 |
### XML-src.upl ends here
|