/[suikacvs]/perl/lib/Encode/ISO2022/CP932.pm
Suika

Contents of /perl/lib/Encode/ISO2022/CP932.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Fri Sep 20 14:01:45 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +23 -8 lines
2002-09-20  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): New function.
	- (_iso2022_to_internal): Renamed from iso2022_to_internal.
	- (iso2022_to_internal): Experimental support of DOCS.
	- (internal_to_iso2022): Output in UCS coding systems
	if the character is unable to be encoded in ISO/IEC 2022
	coded character sets.
	- (_i2o): New procedure.
	- ($C->{option}->{designate_to}->{coding_system}): New option
	property object.
	- ($C->{coding_system}): New property.
	- (%CODING_SYSTEM): New hash.  (Alias to Encode::Charset's one.)
	* Charset.pm (make_initial_coding_system): Set 'reset_state'
	property with 1 value to coding systems of DOCS with 02/14 I byte.

1 require 5.7.3;
2 package Encode::ISO2022::CP932;
3 use strict;
4 use vars qw(%DEFAULT $VERSION);
5 $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
6 use base qw(Encode::Encoding);
7 __PACKAGE__->Define (qw/x-iso2022jp-cp932 CP50220/);
8
9 sub encode ($$;$) {
10 my ($obj, $str, $chk) = @_;
11 $_[1] = '' if $chk;
12 ## TODO: implement this!
13 $str = Encode::encode ('iso-2022-jp', $str); ## temporary
14 return $str;
15 }
16
17 sub decode ($$;$) {
18 my ($obj, $str, $chk) = @_;
19 $_[1] = '' if $chk;
20 _jis7_to_sjis (\$str);
21 return Encode::decode ('cp932', $str);
22 }
23
24 my %_L2S;
25 sub _jis7_to_sjis (\$) {
26 my $s = shift;
27 $$s =~ s(
28 (\x1B\x24\x28?[\x40\x42]|\x1B\x28[BHIJ])
29 ([^\x00-\x20]*)
30 ){
31 my ($esc, $str) = ($1, $2);
32 if ($esc =~ /\x1B\x28I/o) {
33 $str =~ tr/\x21-\x7E/\xA1-\xDF???????????????????????????????/;
34 } elsif ($esc !~ /\x1B\x28/o) {
35 $str =~ s{ ([\x21-\xFF][\x21-\x7E]) }{
36 my $s = $1;
37 unless ($_L2S{$s}){
38 my ($c1, $c2) = unpack 'CC', $s;
39 $c2 += ($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E;
40 $c1 = (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1);
41 $_L2S{$s} = pack 'CC', $c1, $c2;
42 }
43 $_L2S{$s};
44 }goex;
45 }
46 $str;
47 }goex;
48 $$s =~ s{ \x0E([\xA1-\xDF]+)\x0F }{
49 $1;
50 }goex;
51 }
52
53 1;
54 __END__
55
56 =head1 NAME
57
58 Encode::ISO2022::CP932 --- Encode module for ISO/IEC 2022 like
59 encoding of Microsoft CP932 (Shift JIS)
60
61 =head1 DESCRIPTION
62
63 Windows Code Page 932, Microsoft version of shift JIS,
64 is widely used in Japanese PC community. It is the combination
65 of JIS character set with non-standard extended characters (CCS)
66 and "shift JIS" encoding scheme (CES).
67
68 Non-PC communities such as un*x users' or Internet mail/news'
69 use standard JIS character set for CCS and EUC or 7bit
70 ISO/IEC 2022 for CES.
71
72 This situation makes such stupid converters that
73 output the charset consist of non-standard CCS and EUC
74 or 7bit ISO/IEC 2022 CES.
75
76 This module supports two such charsets. One is
77 C<x-iso2022jp-cp932>,
78
79 =head1 ENCODINGS
80
81 =over 4
82
83 =item x-iso2022jp-cp932
84
85 Microsoft Windows CodePage 50220: A transformation format of
86 Microsoft Windows CodePage 932 that looks like C<ISO-2022-JP>
87 (Alias: C<CP50220> (M$))
88
89 Note that this coding system does NOT comform to RFC 1468,
90 JIS standards nor ISO/IEC 2022. This coding system
91 SHOULD be used when and ONLY when converting data received
92 from Microsoft Windows platforms.
93
94 =back
95
96 =head1 LICENSE
97
98 Copyright 2002 Wakaba <w@suika.fam.cx>
99
100 This library is free software; you can redistribute it
101 and/or modify it under the same terms as Perl itself.
102
103 =cut
104
105 # $Date: $
106 ### CP932.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24