/[suikacvs]/messaging/manakai/lib/Message/Charset/Info.pm
Suika

Contents of /messaging/manakai/lib/Message/Charset/Info.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sun Nov 18 11:08:40 2007 UTC (17 years ago) by wakaba
Branch: MAIN
++ manakai/lib/Message/ChangeLog	18 Nov 2007 05:58:46 -0000
2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* Charset/: New directory.

++ manakai/lib/Message/DOM/ChangeLog	18 Nov 2007 08:56:34 -0000
2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* Document.pm, Entity.pm (manakai_has_bom,
	manakai_charset): New attributes.

++ manakai/lib/Message/Charset/ChangeLog	18 Nov 2007 11:08:08 -0000
2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* Info.pm: New Perl module.

2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* ChangeLog: New file.


++ manakai/t/ChangeLog	18 Nov 2007 08:41:50 -0000
2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* DOM-Document.t, DOM-Entity.t: New tests for |manakai_has_bom|
	attribute.

1 wakaba 1.1 package Message::Charset::Info;
2     use strict;
3     our $VERSION=do{my @r=(q$Revision: 1.19 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4    
5     sub UNREGISTERED_CHARSET_NAME () { 0b1 }
6     sub REGISTERED_CHARSET_NAME () { 0b10 }
7     sub PRIMARY_CHARSET_NAME () { 0b100 | REGISTERED_CHARSET_NAME }
8     ## "Name:" field for IANA names
9     sub PREFERRED_CHARSET_NAME () { 0b1000 | REGISTERED_CHARSET_NAME }
10     ## "preferred MIME name" for IANA names
11    
12     ## iana_names
13     ## is_html_ascii_superset: "superset of US-ASCII (specifically, ANSI_X3.4-1968)
14     ## for bytes in the range 0x09 - 0x0D, 0x20, 0x21, 0x22, 0x26, 0x27,
15     ## 0x2C - 0x3F, 0x41 - 0x5A, and 0x61 - 0x7A" [HTML5]
16     ## is_ebcdic_based
17    
18     ## ISSUE: Shift_JIS is a superset of US-ASCII? ISO-2022-JP is?
19     ## ISSUE: 0x5F (_) should be added to the range?
20    
21     my $Charset;
22    
23     our $IANACharset;
24    
25     $Charset->{'us-ascii'}
26     = $IANACharset->{'ansi_x3.4-1968'}
27     = $IANACharset->{'iso-ir-6'}
28     = $IANACharset->{'ansi_x3.4-1986'}
29     = $IANACharset->{'iso_646.irv:1991'}
30     = $IANACharset->{'ascii'}
31     = $IANACharset->{'iso646-us'}
32     = $IANACharset->{'us-ascii'}
33     = $IANACharset->{'us'}
34     = $IANACharset->{'ibm367'}
35     = $IANACharset->{'cp367'}
36     = $IANACharset->{'csascii'}
37     = {
38     iana_names => {
39     'ansi_x3.4-1968' => PRIMARY_CHARSET_NAME,
40     'iso-ir-6' => REGISTERED_CHARSET_NAME,
41     'ansi_x3.4-1986' => REGISTERED_CHARSET_NAME,
42     'iso_646.irv:1991' => REGISTERED_CHARSET_NAME,
43     'ascii' => REGISTERED_CHARSET_NAME,
44     'iso646-us' => REGISTERED_CHARSET_NAME,
45     'us-ascii' => PREFERRED_CHARSET_NAME,
46     'us' => REGISTERED_CHARSET_NAME,
47     'ibm367' => REGISTERED_CHARSET_NAME,
48     'cp367' => REGISTERED_CHARSET_NAME,
49     'csascii' => REGISTERED_CHARSET_NAME,
50     },
51     is_html_ascii_superset => 1,
52     };
53    
54     $Charset->{'iso-8859-1'}
55     = $IANACharset->{'iso_8859-1:1987'}
56     = $IANACharset->{'iso-ir-100'}
57     = $IANACharset->{'iso_8859-1'}
58     = $IANACharset->{'iso-8859-1'}
59     = $IANACharset->{'latin1'}
60     = $IANACharset->{'l1'}
61     = $IANACharset->{'ibm819'}
62     = $IANACharset->{'cp819'}
63     = $IANACharset->{'csisolatin1'}
64     = {
65     iana_names => {
66     'iso_8859-1:1987' => PRIMARY_CHARSET_NAME,
67     'iso-ir-100' => REGISTERED_CHARSET_NAME,
68     'iso_8859-1' => REGISTERED_CHARSET_NAME,
69     'iso-8859-1' => PREFERRED_CHARSET_NAME,
70     'latin1' => REGISTERED_CHARSET_NAME,
71     'l1' => REGISTERED_CHARSET_NAME,
72     'ibm819' => REGISTERED_CHARSET_NAME,
73     'cp819' => REGISTERED_CHARSET_NAME,
74     'csisolatin1' => REGISTERED_CHARSET_NAME,
75     },
76     is_html_ascii_superset => 1,
77     };
78    
79     ## TODO: other names..
80    
81     $Charset->{'shift_jis'}
82     = $IANACharset->{'shift_jis'}
83     = $IANACharset->{'ms_kanji'}
84     = $IANACharset->{'csshiftjis'}
85     = {
86     iana_names => {
87     'shift_jis' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME,
88     'ms_kanji' => REGISTERED_CHARSET_NAME,
89     'csshiftjis' => REGISTERED_CHARSET_NAME,
90     },
91     };
92    
93     $Charset->{'euc-jp'}
94     = $IANACharset->{'extended_unix_code_packed_format_for_japanese'}
95     = $IANACharset->{'cseucpkdfmtjapanese'}
96     = $IANACharset->{'euc-jp'}
97     = {
98     iana_names => {
99     'extended_unix_code_packed_format_for_japanese' => PRIMARY_CHARSET_NAME,
100     'cseucpkdfmtjapanese' => REGISTERED_CHARSET_NAME,
101     'euc-jp' => PREFERRED_CHARSET_NAME,
102     },
103     is_html_ascii_superset => 1,
104     };
105    
106     ## TODO: ...
107    
108     $Charset->{'iso-2022-jp'}
109     = $IANACharset->{'iso-2022-jp'}
110     = $IANACharset->{'csiso2022jp'}
111     = {
112     iana_names => {
113     'iso-2022-jp' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME,
114     'csiso2022jp' => REGISTERED_CHARSET_NAME,
115     },
116     };
117    
118     ## TODO: ...
119    
120     $Charset->{'utf-8'}
121     = $IANACharset->{'utf-8'}
122     = {
123     iana_names => {
124     'utf-8' => PRIMARY_CHARSET_NAME,
125     },
126     is_html_ascii_superset => 1,
127     };
128    
129     ## TODO: ...
130    
131     $Charset->{'utf-16be'}
132     = $IANACharset->{'utf-16be'}
133     = {
134     iana_names => {
135     'utf-16be' => PRIMARY_CHARSET_NAME,
136     },
137     };
138    
139     $Charset->{'utf-16le'}
140     = $IANACharset->{'utf-16le'}
141     = {
142     iana_names => {
143     'utf-16le' => PRIMARY_CHARSET_NAME,
144     },
145     };
146    
147     $Charset->{'utf-16'}
148     = $IANACharset->{'utf-16'}
149     = {
150     iana_names => {
151     'utf-16' => PRIMARY_CHARSET_NAME,
152     },
153     };
154    
155     ## TODO: ...
156    
157     $Charset->{'windows-1252'}
158     = $IANACharset->{'windows-1252'}
159     = {
160     iana_names => {
161     'windows-1252' => PRIMARY_CHARSET_NAME,
162     },
163     is_html_ascii_superset => 1,
164     };
165    
166     ## TODO: ...
167    
168     sub is_syntactically_iana_charset_name ($) {
169     my $name = shift;
170     return $name =~ /\A[\x20-\x7E]{1,40}\z/;
171     } # is_suntactically_valid_iana_charset_name
172    
173     1;
174     ## $Date:$
175    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24