/[suikacvs]/markup/html/whatpm/Whatpm/Charset/WebLatin1.pm
Suika

Contents of /markup/html/whatpm/Whatpm/Charset/WebLatin1.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Wed Sep 10 10:27:09 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +82 -1 lines
++ whatpm/Whatpm/ChangeLog	10 Sep 2008 10:25:19 -0000
2008-09-10  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: Support for charset-layer error levels.

	* HTML.pm.src: Don't specify |text| argument for the
	|chardecode:fallback| error, since it is not the encoding
	being used alternatively.

++ whatpm/Whatpm/Charset/ChangeLog	10 Sep 2008 10:26:52 -0000
2008-09-10  Wakaba  <wakaba@suika.fam.cx>

	* DecodeHandle.pm: Set error levels.

	* WebLatin1.pm: Support for |us-ascii| and |iso-8859-5|
	charsets (this module no longer for Latin1, but for Latin*
	encodings).

	* WebThai.pm: Support for |tis-620| charset.

1 wakaba 1.1 #!/usr/bin/perl
2     package Whatpm::Charset::WebLatin1;
3     use strict;
4     our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
5    
6     ## NOTE: This module does not expect that its standalone uses.
7     ## See Message::Charset::Info for how it is used.
8    
9     require Encode::Encoding;
10     push our @ISA, 'Encode::Encoding';
11     __PACKAGE__->Define (qw/web-latin1/);
12    
13     sub encode ($$;$) {
14     # $self, $str, $chk
15     if ($_[2]) {
16     if ($_[1] =~ s/^([\x00-\x7F\xA0-\xFF]+)//) {
17     return Encode::encode ('iso-8859-1', $1);
18     } else {
19     return '';
20     }
21     } else {
22     my $r = $_[1];
23     $r =~ s/[^\x00-\x7F\xA0-\xFF]/?/g;
24     return Encode::encode ('iso-8859-1', $r);
25     }
26     } # encode
27    
28     sub decode ($$;$) {
29     # $self, $s, $chk
30     if ($_[2]) {
31     my $r = '';
32     while (1) {
33     if ($_[1] =~ s/^([\x00-\x7F\xA0-\xFF]+)//) {
34     $r .= $1;
35     #} elsif ($_[1] =~ s/^([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])//) {
36     # my $v = $1;
37     # $v =~ tr/\x80-\x9F/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}/;
38     # $r .= $v;
39     } else {
40     return $r;
41     }
42     }
43     } else {
44     my $r = $_[1];
45     $r =~ tr/\x80-\x9F/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}/;
46     return $r;
47     }
48     } # decode
49    
50 wakaba 1.2 package Whatpm::Charset::WebLatin1::USASCII;
51     push our @ISA, 'Encode::Encoding';
52     __PACKAGE__->Define (qw/web-latin1-us-ascii/);
53    
54     sub encode ($$;$) {
55     # $self, $str, $chk
56     if ($_[2]) {
57     if ($_[1] =~ s/^([\x00-\x7F]+)//) {
58     return Encode::encode ('iso-8859-1', $1);
59     } else {
60     return '';
61     }
62     } else {
63     my $r = $_[1];
64     $r =~ s/[^\x00-\x7F]/?/g;
65     return Encode::encode ('iso-8859-1', $r);
66     }
67     } # encode
68    
69     sub decode ($$;$) {
70     # $self, $s, $chk
71     if ($_[2]) {
72     my $r = '';
73     while (1) {
74     if ($_[1] =~ s/^([\x00-\x7F]+)//) {
75     $r .= $1;
76     #} elsif ($_[1] =~ s/^([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F-\xFF])//) {
77     # my $v = $1;
78     # $v =~ tr/\x80-\xFF/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}\xA0-\xFF/;
79     # $r .= $v;
80     } else {
81     return $r;
82     }
83     }
84     } else {
85     my $r = $_[1];
86     $r =~ tr/\x80-\xFF/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}\xA0-\xFF/;
87     return $r;
88     }
89     } # decode
90    
91     package Whatpm::Charset::WebLatin1::WebLatin5;
92     push our @ISA, 'Encode::Encoding';
93     __PACKAGE__->Define (qw/web-latin5/);
94    
95     sub encode ($$;$) {
96     # $self, $str, $chk
97     if ($_[2]) {
98     if ($_[1] =~ s/^([\x00-\x7F]+)//) {
99     return Encode::encode ('iso-8859-9', $1);
100     } else {
101     return '';
102     }
103     } else {
104     my $r = $_[1];
105     $r =~ s/[^\x00-\x7F]/?/g;
106     return Encode::encode ('iso-8859-9', $r);
107     }
108     } # encode
109    
110     sub decode ($$;$) {
111     # $self, $s, $chk
112     if ($_[2]) {
113     my $r = '';
114     while (1) {
115     if ($_[1] =~ s/^([\x00-\x7F\xA0-\xFF]+)//) {
116     $r .= Encode::decode ('windows-1254', $1);
117     #} elsif ($_[1] =~ s/^([\x80\x82-\x8C\x91-\x9C\x9F])//) {
118     # my $v = $1;
119     # $v =~ tr/\x80-\x9F/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{FFFD}\x{0178}/;
120     # $r .= $v;
121     } else {
122     return $r;
123     }
124     }
125     } else {
126     my $r = Encode::decode ('windows-1254', $_[1]);
127     return $r;
128     }
129     } # decode
130    
131 wakaba 1.1 1;
132 wakaba 1.2 ## $Date: 2008/05/18 06:07:22 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24