1 |
wakaba |
1.1 |
package Message::Charset::Info; |
2 |
|
|
use strict; |
3 |
|
|
our $VERSION=do{my @r=(q$Revision: 1.19 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
4 |
|
|
|
5 |
|
|
sub UNREGISTERED_CHARSET_NAME () { 0b1 } |
6 |
|
|
sub REGISTERED_CHARSET_NAME () { 0b10 } |
7 |
|
|
sub PRIMARY_CHARSET_NAME () { 0b100 | REGISTERED_CHARSET_NAME } |
8 |
|
|
## "Name:" field for IANA names |
9 |
|
|
sub PREFERRED_CHARSET_NAME () { 0b1000 | REGISTERED_CHARSET_NAME } |
10 |
|
|
## "preferred MIME name" for IANA names |
11 |
|
|
|
12 |
|
|
## iana_names |
13 |
|
|
## is_html_ascii_superset: "superset of US-ASCII (specifically, ANSI_X3.4-1968) |
14 |
|
|
## for bytes in the range 0x09 - 0x0D, 0x20, 0x21, 0x22, 0x26, 0x27, |
15 |
|
|
## 0x2C - 0x3F, 0x41 - 0x5A, and 0x61 - 0x7A" [HTML5] |
16 |
|
|
## is_ebcdic_based |
17 |
|
|
|
18 |
|
|
## ISSUE: Shift_JIS is a superset of US-ASCII? ISO-2022-JP is? |
19 |
|
|
## ISSUE: 0x5F (_) should be added to the range? |
20 |
|
|
|
21 |
|
|
my $Charset; |
22 |
|
|
|
23 |
|
|
our $IANACharset; |
24 |
|
|
|
25 |
|
|
$Charset->{'us-ascii'} |
26 |
|
|
= $IANACharset->{'ansi_x3.4-1968'} |
27 |
|
|
= $IANACharset->{'iso-ir-6'} |
28 |
|
|
= $IANACharset->{'ansi_x3.4-1986'} |
29 |
|
|
= $IANACharset->{'iso_646.irv:1991'} |
30 |
|
|
= $IANACharset->{'ascii'} |
31 |
|
|
= $IANACharset->{'iso646-us'} |
32 |
|
|
= $IANACharset->{'us-ascii'} |
33 |
|
|
= $IANACharset->{'us'} |
34 |
|
|
= $IANACharset->{'ibm367'} |
35 |
|
|
= $IANACharset->{'cp367'} |
36 |
|
|
= $IANACharset->{'csascii'} |
37 |
|
|
= { |
38 |
|
|
iana_names => { |
39 |
|
|
'ansi_x3.4-1968' => PRIMARY_CHARSET_NAME, |
40 |
|
|
'iso-ir-6' => REGISTERED_CHARSET_NAME, |
41 |
|
|
'ansi_x3.4-1986' => REGISTERED_CHARSET_NAME, |
42 |
|
|
'iso_646.irv:1991' => REGISTERED_CHARSET_NAME, |
43 |
|
|
'ascii' => REGISTERED_CHARSET_NAME, |
44 |
|
|
'iso646-us' => REGISTERED_CHARSET_NAME, |
45 |
|
|
'us-ascii' => PREFERRED_CHARSET_NAME, |
46 |
|
|
'us' => REGISTERED_CHARSET_NAME, |
47 |
|
|
'ibm367' => REGISTERED_CHARSET_NAME, |
48 |
|
|
'cp367' => REGISTERED_CHARSET_NAME, |
49 |
|
|
'csascii' => REGISTERED_CHARSET_NAME, |
50 |
|
|
}, |
51 |
|
|
is_html_ascii_superset => 1, |
52 |
|
|
}; |
53 |
|
|
|
54 |
|
|
$Charset->{'iso-8859-1'} |
55 |
|
|
= $IANACharset->{'iso_8859-1:1987'} |
56 |
|
|
= $IANACharset->{'iso-ir-100'} |
57 |
|
|
= $IANACharset->{'iso_8859-1'} |
58 |
|
|
= $IANACharset->{'iso-8859-1'} |
59 |
|
|
= $IANACharset->{'latin1'} |
60 |
|
|
= $IANACharset->{'l1'} |
61 |
|
|
= $IANACharset->{'ibm819'} |
62 |
|
|
= $IANACharset->{'cp819'} |
63 |
|
|
= $IANACharset->{'csisolatin1'} |
64 |
|
|
= { |
65 |
|
|
iana_names => { |
66 |
|
|
'iso_8859-1:1987' => PRIMARY_CHARSET_NAME, |
67 |
|
|
'iso-ir-100' => REGISTERED_CHARSET_NAME, |
68 |
|
|
'iso_8859-1' => REGISTERED_CHARSET_NAME, |
69 |
|
|
'iso-8859-1' => PREFERRED_CHARSET_NAME, |
70 |
|
|
'latin1' => REGISTERED_CHARSET_NAME, |
71 |
|
|
'l1' => REGISTERED_CHARSET_NAME, |
72 |
|
|
'ibm819' => REGISTERED_CHARSET_NAME, |
73 |
|
|
'cp819' => REGISTERED_CHARSET_NAME, |
74 |
|
|
'csisolatin1' => REGISTERED_CHARSET_NAME, |
75 |
|
|
}, |
76 |
|
|
is_html_ascii_superset => 1, |
77 |
|
|
}; |
78 |
|
|
|
79 |
|
|
## TODO: other names.. |
80 |
|
|
|
81 |
|
|
$Charset->{'shift_jis'} |
82 |
|
|
= $IANACharset->{'shift_jis'} |
83 |
|
|
= $IANACharset->{'ms_kanji'} |
84 |
|
|
= $IANACharset->{'csshiftjis'} |
85 |
|
|
= { |
86 |
|
|
iana_names => { |
87 |
|
|
'shift_jis' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME, |
88 |
|
|
'ms_kanji' => REGISTERED_CHARSET_NAME, |
89 |
|
|
'csshiftjis' => REGISTERED_CHARSET_NAME, |
90 |
|
|
}, |
91 |
|
|
}; |
92 |
|
|
|
93 |
|
|
$Charset->{'euc-jp'} |
94 |
|
|
= $IANACharset->{'extended_unix_code_packed_format_for_japanese'} |
95 |
|
|
= $IANACharset->{'cseucpkdfmtjapanese'} |
96 |
|
|
= $IANACharset->{'euc-jp'} |
97 |
|
|
= { |
98 |
|
|
iana_names => { |
99 |
|
|
'extended_unix_code_packed_format_for_japanese' => PRIMARY_CHARSET_NAME, |
100 |
|
|
'cseucpkdfmtjapanese' => REGISTERED_CHARSET_NAME, |
101 |
|
|
'euc-jp' => PREFERRED_CHARSET_NAME, |
102 |
|
|
}, |
103 |
|
|
is_html_ascii_superset => 1, |
104 |
|
|
}; |
105 |
|
|
|
106 |
|
|
## TODO: ... |
107 |
|
|
|
108 |
|
|
$Charset->{'iso-2022-jp'} |
109 |
|
|
= $IANACharset->{'iso-2022-jp'} |
110 |
|
|
= $IANACharset->{'csiso2022jp'} |
111 |
|
|
= { |
112 |
|
|
iana_names => { |
113 |
|
|
'iso-2022-jp' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME, |
114 |
|
|
'csiso2022jp' => REGISTERED_CHARSET_NAME, |
115 |
|
|
}, |
116 |
|
|
}; |
117 |
|
|
|
118 |
|
|
## TODO: ... |
119 |
|
|
|
120 |
|
|
$Charset->{'utf-8'} |
121 |
|
|
= $IANACharset->{'utf-8'} |
122 |
|
|
= { |
123 |
|
|
iana_names => { |
124 |
|
|
'utf-8' => PRIMARY_CHARSET_NAME, |
125 |
|
|
}, |
126 |
|
|
is_html_ascii_superset => 1, |
127 |
|
|
}; |
128 |
|
|
|
129 |
|
|
## TODO: ... |
130 |
|
|
|
131 |
|
|
$Charset->{'utf-16be'} |
132 |
|
|
= $IANACharset->{'utf-16be'} |
133 |
|
|
= { |
134 |
|
|
iana_names => { |
135 |
|
|
'utf-16be' => PRIMARY_CHARSET_NAME, |
136 |
|
|
}, |
137 |
|
|
}; |
138 |
|
|
|
139 |
|
|
$Charset->{'utf-16le'} |
140 |
|
|
= $IANACharset->{'utf-16le'} |
141 |
|
|
= { |
142 |
|
|
iana_names => { |
143 |
|
|
'utf-16le' => PRIMARY_CHARSET_NAME, |
144 |
|
|
}, |
145 |
|
|
}; |
146 |
|
|
|
147 |
|
|
$Charset->{'utf-16'} |
148 |
|
|
= $IANACharset->{'utf-16'} |
149 |
|
|
= { |
150 |
|
|
iana_names => { |
151 |
|
|
'utf-16' => PRIMARY_CHARSET_NAME, |
152 |
|
|
}, |
153 |
|
|
}; |
154 |
|
|
|
155 |
|
|
## TODO: ... |
156 |
|
|
|
157 |
|
|
$Charset->{'windows-1252'} |
158 |
|
|
= $IANACharset->{'windows-1252'} |
159 |
|
|
= { |
160 |
|
|
iana_names => { |
161 |
|
|
'windows-1252' => PRIMARY_CHARSET_NAME, |
162 |
|
|
}, |
163 |
|
|
is_html_ascii_superset => 1, |
164 |
|
|
}; |
165 |
|
|
|
166 |
|
|
## TODO: ... |
167 |
|
|
|
168 |
|
|
sub is_syntactically_iana_charset_name ($) { |
169 |
|
|
my $name = shift; |
170 |
|
|
return $name =~ /\A[\x20-\x7E]{1,40}\z/; |
171 |
|
|
} # is_suntactically_valid_iana_charset_name |
172 |
|
|
|
173 |
|
|
1; |
174 |
|
|
## $Date:$ |
175 |
|
|
|