342 |
$C ||= &new_object; |
$C ||= &new_object; |
343 |
|
|
344 |
my $r = ''; |
my $r = ''; |
345 |
for my $c (split //, $s) { |
my @c = split //, $s; |
346 |
my $cc = ord $c; Encode::_utf8_off ($c); |
for my $i (0..$#c) { |
347 |
|
my $c = $c[$i]; my $cc = ord $c; Encode::_utf8_off ($c); |
348 |
my $t; |
my $t; |
349 |
if ($cc <= 0x1F) { |
if ($cc <= 0x1F) { |
350 |
$t = _i2c ($c, $C, type => 'C0', charset => '@'); |
$t = _i2c ($c, $C, type => 'C0', charset => '@'); |
453 |
->[ ($cc / 0x10000) - 0x7042 ]->[ $c / 8836 ]); |
->[ ($cc / 0x10000) - 0x7042 ]->[ $c / 8836 ]); |
454 |
} |
} |
455 |
if (defined $t) { |
if (defined $t) { |
456 |
|
## Back to ISO/IEC 2022 if necessary |
457 |
$t = _i2o ($t, $C, cs_F => "\x40") |
$t = _i2o ($t, $C, cs_F => "\x40") |
458 |
if $C->{coding_system} ne $CODING_SYSTEM{"\x40"}; |
if $C->{coding_system} ne $CODING_SYSTEM{"\x40"}; |
459 |
} else { |
} else { |
460 |
|
## Output in UCS-n or UTF-n if character can't be represented in ISO/IEC 2022 |
461 |
my $F; my @F = qw~G /G /H /I B /A /D /F~; |
my $F; my @F = qw~G /G /H /I B /A /D /F~; |
462 |
push @F, qw~/J /K /L~ if $cc <= 0x10FFFF; |
push @F, qw~/J /K /L~ if $cc <= 0x10FFFF; |
463 |
push @F, qw~/@ /C /E~ if $cc <= 0xFFFF; |
push @F, qw~/@ /C /E~ if $cc <= 0xFFFF; |
471 |
} |
} |
472 |
$t = _i2o ($c, $C, cs_F => $F) if $F; |
$t = _i2o ($c, $C, cs_F => $F) if $F; |
473 |
} |
} |
474 |
if (defined $t) { |
if (defined $t) { ## Output character itself |
475 |
$r .= $t; |
$r .= $t; |
476 |
|
} elsif ($C->{option}->{fallback_from_ucs} =~ /quiet/) { |
477 |
|
$r .= _back2ascii ($C) if $C->{option}->{fallback_from_ucs} =~ /back/; |
478 |
|
return ($r, halfway => 1, converted_length => $i, |
479 |
|
warn => $C->{option}->{fallback_from_ucs} =~ /warn/ ? 1 : 0, |
480 |
|
reason => sprintf (q(U+%04X: There is no character mapped to), $cc)); |
481 |
|
} elsif ($C->{option}->{fallback_from_ucs} eq 'croak') { |
482 |
|
return ($r, halfway => 1, die => 1, |
483 |
|
reason => sprintf (q(U+%04X: There is no character mapped to), $cc)); |
484 |
} else { |
} else { |
485 |
unless ($C->{option}->{undef_char}->[0] eq "\x20") { |
## Try to output with fallback escape sequence (if specified) |
486 |
$t = _i2g ($C->{option}->{undef_char}->[0], $C, |
my $t = Encode::Charset::fallback_escape ($C, $c); |
487 |
%{ $C->{option}->{undef_char}->[1] }); |
if (defined $t) { |
488 |
} else { ## SP |
my %D = (fallback => $C->{option}->{fallback_from_ucs}, reset => $C->{option}->{reset}); |
489 |
$t = _back2ascii ($C) . "\x20"; |
$C->{option}->{fallback_from_ucs} = 'croak'; |
490 |
|
$C->{option}->{reset} = {Gdesignation => 0, Ginvoke => 0}; |
491 |
|
eval q{$t = $C->{_encoder}->_encode_internal ($t, $C)} or undef $t; |
492 |
|
$C->{option}->{fallback_from_ucs} = $D{fallback}; |
493 |
|
$C->{option}->{reset} = $D{reset}; |
494 |
|
} |
495 |
|
if (defined $t) { |
496 |
|
$r .= $t; |
497 |
|
} else { ## Replacement character specified in charset definition |
498 |
|
unless ($C->{option}->{undef_char}->[0] eq "\x20") { ## A graphic character |
499 |
|
$t = _i2g ($C->{option}->{undef_char}->[0], $C, |
500 |
|
%{ $C->{option}->{undef_char}->[1] }); |
501 |
|
} else { ## SPACE |
502 |
|
$t = _back2ascii ($C) . "\x20"; |
503 |
|
} |
504 |
|
$r .= $C->{coding_system} eq $CODING_SYSTEM{"\x40"} ? |
505 |
|
$t : _i2o ($t, $C, cs_F => "\x40"); |
506 |
} |
} |
|
$r .= $C->{coding_system} eq $CODING_SYSTEM{"\x40"} ? |
|
|
$t : _i2o ($t, $C, cs_F => "\x40"); |
|
507 |
} |
} |
508 |
} |
} |
509 |
$r . _back2ascii ($C); |
($r . _back2ascii ($C)); ## Back to ASCII at the end of document if specified |
510 |
} |
} |
511 |
|
|
512 |
## $O{charset} eq undef means that charset is same as the current designated one. |
## $O{charset} eq undef means that charset is same as the current designated one. |
714 |
$r . $s; |
$r . $s; |
715 |
} |
} |
716 |
|
|
|
1; |
|
|
__END__ |
|
|
|
|
717 |
=head1 SEE ALSO |
=head1 SEE ALSO |
718 |
|
|
719 |
ISO/IEC 646:1991, "7-bit coded graphic character set for intormation interchange". |
ISO/IEC 646:1991, "7-bit coded graphic character set for intormation interchange". |
729 |
|
|
730 |
ISO/IEC 8859, "8-Bit Single-Byte Coded Graphic Character Sets". |
ISO/IEC 8859, "8-Bit Single-Byte Coded Graphic Character Sets". |
731 |
|
|
732 |
Encode, perlunicode |
L<Encode>, perlunicode |
733 |
|
|
734 |
=head1 TODO |
=head1 TODO |
735 |
|
|
782 |
|
|
783 |
=head1 AUTHORS |
=head1 AUTHORS |
784 |
|
|
785 |
Nanashi-san |
Nanashi-san <nanashi.san@nanashi.invalid> |
786 |
|
|
787 |
Wakaba <w@suika.fam.cx> |
Wakaba <w@suika.fam.cx> |
788 |
|
|
789 |
=head1 LICENSE |
=head1 LICENSE |
790 |
|
|
791 |
Copyright 2002 AUTHORS |
Copyright 2002 AUTHORS, all rights reserved. |
792 |
|
|
793 |
This library is free software; you can redistribute it |
This library is free software; you can redistribute it |
794 |
and/or modify it under the same terms as Perl itself. |
and/or modify it under the same terms as Perl itself. |
795 |
|
|
796 |
=cut |
=cut |
797 |
|
|
798 |
# $Date$ |
1; # $Date$ |