/[suikacvs]/messaging/manakai/lib/Message/MIME/Charset.pm
Suika

Diff of /messaging/manakai/lib/Message/MIME/Charset.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16 by wakaba, Sat Jul 27 00:39:54 2002 UTC revision 1.17 by wakaba, Sun Aug 18 06:22:36 2002 UTC
# Line 101  my %_MINIMUMIZER = ( Line 101  my %_MINIMUMIZER = (
101          'iso-2022-jp-3-plane1'  => \&_name_8bit_iso2022,          'iso-2022-jp-3-plane1'  => \&_name_8bit_iso2022,
102          'iso-2022-kr'   => \&_name_8bit_iso2022,          'iso-2022-kr'   => \&_name_8bit_iso2022,
103          'iso-8859-1'    => \&_name_8bit_iso2022,          'iso-8859-1'    => \&_name_8bit_iso2022,
         'iso-10646-j-1' => \&_name_utf16be,  
         'iso-10646-ucs-2'       => \&_name_utf16be,  
         'iso-10646-ucs-4'       => \&_name_utf32be,  
         'iso-10646-ucs-basic'   => \&_name_utf16be,  
         'iso-10646-unicode-latin1'      => \&_name_utf16be,  
104          jis_x0201       => \&_name_shift_jis,          jis_x0201       => \&_name_shift_jis,
105          junet   => \&_name_8bit_iso2022,          junet   => \&_name_8bit_iso2022,
106          'x-junet8'      => \&_name_net_ascii_8bit,          'x-junet8'      => \&_name_net_ascii_8bit,
# Line 115  my %_MINIMUMIZER = ( Line 110  my %_MINIMUMIZER = (
110          'x-sjis'        => \&_name_shift_jis,          'x-sjis'        => \&_name_shift_jis,
111          'us-ascii'      => \&_name_net_ascii_8bit,          'us-ascii'      => \&_name_net_ascii_8bit,
112          'utf-8' => \&_name_net_ascii_8bit,          'utf-8' => \&_name_net_ascii_8bit,
         'utf-16be'      => \&_name_utf16be,  
         'utf-32be'      => \&_name_utf32be,  
113  );  );
114    
115  my %_IsMimeText;  my %_IsMimeText;
# Line 227  sub name_normalize ($) { Line 220  sub name_normalize ($) {
220  }  }
221    
222  sub name_minimumize ($$) {  sub name_minimumize ($$) {
223      require Message::MIME::Charset::MinName;
224    my ($charset, $s) = (lc shift, shift);    my ($charset, $s) = (lc shift, shift);
225    if (ref $CHARSET{$charset}->{name_minimumizer} eq 'CODE') {    if (ref $CHARSET{$charset}->{name_minimumizer} eq 'CODE') {
226      return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s);      return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s);
227      } elsif (ref $Message::MIME::Charset::MinName::MIN{$charset}) {
228        return &{$Message::MIME::Charset::MinName::MIN{$charset}} ($charset, $s);
229    } elsif (ref $_MINIMUMIZER{$charset}) {    } elsif (ref $_MINIMUMIZER{$charset}) {
230      return &{$_MINIMUMIZER{$charset}} ($charset, $s);      return &{$_MINIMUMIZER{$charset}} ($charset, $s);
231    } elsif (ref $CHARSET{'*undef'}->{name_minimumizer} eq 'CODE') {    } elsif (ref $CHARSET{'*undef'}->{name_minimumizer} eq 'CODE') {
# Line 467  sub _name_shift_jis ($$) { Line 463  sub _name_shift_jis ($$) {
463    }    }
464  }  }
465    
 sub _name_utf16be ($$) {  
   shift; my $s = shift;  
   if ($s =~ /[\xD8-\xDB][\x00-\xFF][\xDC-\xDF][\x00-\xFF]  
              (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {  
     (charset => 'utf-16be');  
   } elsif ($s =~ /[\x01-\xFF][\x00-\xFF]  
              (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {  
     if ($s =~ /([^\x00\x03\x04\x23\x25\x30\xFE\xFF]  
                      [\x00-\xFF]        # ^\x20\x22\x4E-\x9F\xF9\xFA  
                   |\x03[^\x00-\x6F\xD0-\xFF]  
                   #|\x20[^\x00-\x6F]  
                   |\x25[^\x00-\x7F]  
                   |\xFE[^\x30-\x4F]  
                   |\xFF[^\x00-\xEF]  
                   ## note 1 of RFC 1816 is ambitious, so block entire  
                   ## is excepted  
                     |\x30[\x00-\x3F]  
                   )  
              (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {  
       (charset => 'iso-10646-ucs-2');  
     } else {  
       (charset => 'iso-10646-j-1');  
     }  
   } elsif ($s =~ /\x00[\x80-\xFF]  
              (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {  
     (charset => 'iso-10646-unicode-latin1');  
   } else {  
     (charset => 'iso-10646-ucs-basic');  
   }  
 }  
   
 sub _name_utf32be ($$) {  
   shift; my $s = shift;  
   if ($s =~ /  
     ([\x01-\x7F][\x00-\xFF]{3}  
     |\x00[\x11-\xFF][\x00-\xFF][\x00-\xFF])  
              (?=(?:[\x00-\xFF]{4})*\z)/sx) {  
     (charset => 'iso-10646-ucs-4');  
   } else {  
     (charset => 'utf-32be');  
   }  
 }  
   
466  sub _utf8_on ($) {  sub _utf8_on ($) {
467    Encode::_utf8_on ($_[0]) if $Encode::VERSION;    Encode::_utf8_on ($_[0]) if $Encode::VERSION;
468  }  }

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.17

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24