/[suikacvs]/markup/html/whatpm/Whatpm/Charset/WebLatin1.pm
Suika

Contents of /markup/html/whatpm/Whatpm/Charset/WebLatin1.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Wed Sep 10 10:27:09 2008 UTC (16 years, 10 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +82 -1 lines
++ whatpm/Whatpm/ChangeLog	10 Sep 2008 10:25:19 -0000
2008-09-10  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.pm: Support for charset-layer error levels.

	* HTML.pm.src: Don't specify |text| argument for the
	|chardecode:fallback| error, since it is not the encoding
	being used alternatively.

++ whatpm/Whatpm/Charset/ChangeLog	10 Sep 2008 10:26:52 -0000
2008-09-10  Wakaba  <wakaba@suika.fam.cx>

	* DecodeHandle.pm: Set error levels.

	* WebLatin1.pm: Support for |us-ascii| and |iso-8859-5|
	charsets (this module no longer for Latin1, but for Latin*
	encodings).

	* WebThai.pm: Support for |tis-620| charset.

1 #!/usr/bin/perl
2 package Whatpm::Charset::WebLatin1;
3 use strict;
4 our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
5
6 ## NOTE: This module does not expect that its standalone uses.
7 ## See Message::Charset::Info for how it is used.
8
9 require Encode::Encoding;
10 push our @ISA, 'Encode::Encoding';
11 __PACKAGE__->Define (qw/web-latin1/);
12
13 sub encode ($$;$) {
14 # $self, $str, $chk
15 if ($_[2]) {
16 if ($_[1] =~ s/^([\x00-\x7F\xA0-\xFF]+)//) {
17 return Encode::encode ('iso-8859-1', $1);
18 } else {
19 return '';
20 }
21 } else {
22 my $r = $_[1];
23 $r =~ s/[^\x00-\x7F\xA0-\xFF]/?/g;
24 return Encode::encode ('iso-8859-1', $r);
25 }
26 } # encode
27
28 sub decode ($$;$) {
29 # $self, $s, $chk
30 if ($_[2]) {
31 my $r = '';
32 while (1) {
33 if ($_[1] =~ s/^([\x00-\x7F\xA0-\xFF]+)//) {
34 $r .= $1;
35 #} elsif ($_[1] =~ s/^([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])//) {
36 # my $v = $1;
37 # $v =~ tr/\x80-\x9F/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}/;
38 # $r .= $v;
39 } else {
40 return $r;
41 }
42 }
43 } else {
44 my $r = $_[1];
45 $r =~ tr/\x80-\x9F/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}/;
46 return $r;
47 }
48 } # decode
49
50 package Whatpm::Charset::WebLatin1::USASCII;
51 push our @ISA, 'Encode::Encoding';
52 __PACKAGE__->Define (qw/web-latin1-us-ascii/);
53
54 sub encode ($$;$) {
55 # $self, $str, $chk
56 if ($_[2]) {
57 if ($_[1] =~ s/^([\x00-\x7F]+)//) {
58 return Encode::encode ('iso-8859-1', $1);
59 } else {
60 return '';
61 }
62 } else {
63 my $r = $_[1];
64 $r =~ s/[^\x00-\x7F]/?/g;
65 return Encode::encode ('iso-8859-1', $r);
66 }
67 } # encode
68
69 sub decode ($$;$) {
70 # $self, $s, $chk
71 if ($_[2]) {
72 my $r = '';
73 while (1) {
74 if ($_[1] =~ s/^([\x00-\x7F]+)//) {
75 $r .= $1;
76 #} elsif ($_[1] =~ s/^([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F-\xFF])//) {
77 # my $v = $1;
78 # $v =~ tr/\x80-\xFF/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}\xA0-\xFF/;
79 # $r .= $v;
80 } else {
81 return $r;
82 }
83 }
84 } else {
85 my $r = $_[1];
86 $r =~ tr/\x80-\xFF/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{017D}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{017E}\x{0178}\xA0-\xFF/;
87 return $r;
88 }
89 } # decode
90
91 package Whatpm::Charset::WebLatin1::WebLatin5;
92 push our @ISA, 'Encode::Encoding';
93 __PACKAGE__->Define (qw/web-latin5/);
94
95 sub encode ($$;$) {
96 # $self, $str, $chk
97 if ($_[2]) {
98 if ($_[1] =~ s/^([\x00-\x7F]+)//) {
99 return Encode::encode ('iso-8859-9', $1);
100 } else {
101 return '';
102 }
103 } else {
104 my $r = $_[1];
105 $r =~ s/[^\x00-\x7F]/?/g;
106 return Encode::encode ('iso-8859-9', $r);
107 }
108 } # encode
109
110 sub decode ($$;$) {
111 # $self, $s, $chk
112 if ($_[2]) {
113 my $r = '';
114 while (1) {
115 if ($_[1] =~ s/^([\x00-\x7F\xA0-\xFF]+)//) {
116 $r .= Encode::decode ('windows-1254', $1);
117 #} elsif ($_[1] =~ s/^([\x80\x82-\x8C\x91-\x9C\x9F])//) {
118 # my $v = $1;
119 # $v =~ tr/\x80-\x9F/\x{20AC}\x{FFFD}\x{201A}\x{0192}\x{201E}\x{2026}\x{2020}\x{2021}\x{02C6}\x{2030}\x{0160}\x{2039}\x{0152}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{2018}\x{2019}\x{201C}\x{201D}\x{2022}\x{2013}\x{2014}\x{02DC}\x{2122}\x{0161}\x{203A}\x{0153}\x{FFFD}\x{FFFD}\x{0178}/;
120 # $r .= $v;
121 } else {
122 return $r;
123 }
124 }
125 } else {
126 my $r = Encode::decode ('windows-1254', $_[1]);
127 return $r;
128 }
129 } # decode
130
131 1;
132 ## $Date: 2008/05/18 06:07:22 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24