618 |
sub parse_char_string ($$$;$$) { |
sub parse_char_string ($$$;$$) { |
619 |
#my ($self, $s, $doc, $onerror, $get_wrapper) = @_; |
#my ($self, $s, $doc, $onerror, $get_wrapper) = @_; |
620 |
my $self = shift; |
my $self = shift; |
|
require utf8; |
|
621 |
my $s = ref $_[0] ? $_[0] : \($_[0]); |
my $s = ref $_[0] ? $_[0] : \($_[0]); |
622 |
open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s; |
require Whatpm::Charset::DecodeHandle; |
623 |
|
my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s); |
624 |
if ($_[3]) { |
if ($_[3]) { |
625 |
$input = $_[3]->($input); |
$input = $_[3]->($input); |
626 |
} |
} |
689 |
(0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or |
(0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or |
690 |
(0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or |
(0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or |
691 |
(0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or |
(0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or |
692 |
|
## ISSUE: U+FDE0-U+FDEF are not excluded |
693 |
{ |
{ |
694 |
0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1, |
0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1, |
695 |
0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1, |
0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1, |
714 |
$self->{prev_char} = [-1, -1, -1]; |
$self->{prev_char} = [-1, -1, -1]; |
715 |
$self->{next_char} = -1; |
$self->{next_char} = -1; |
716 |
|
|
717 |
|
$self->{getc_until} = sub { return undef }; |
718 |
|
# if ($input->can ('manakai_getc_until')) { |
719 |
|
$self->{getc_until} = sub { |
720 |
|
my $special_range = shift; |
721 |
|
return undef if defined $self->{next_next_char}; |
722 |
|
my $s = $input->manakai_getc_until |
723 |
|
(qr/(?![$special_range\x{FDD0}-\x{FDDF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}])[\x20-\x7E\xA0-\x{D7FF}\x{E000}-\x{10FFFD}]/); |
724 |
|
if ($s) { |
725 |
|
$self->{column} += length $$s; |
726 |
|
$self->{column_prev} += length $$s; |
727 |
|
$self->{prev_char} = [-1, -1, -1]; |
728 |
|
$self->{next_char} = -1; |
729 |
|
} |
730 |
|
return $s; |
731 |
|
}; # $self->{getc_until} |
732 |
|
# } else { |
733 |
|
# $self->{getc_until} = sub { |
734 |
|
# my $special_range = shift; |
735 |
|
# return undef if defined $self->{next_next_char}; |
736 |
|
# my $c = $input->getc; |
737 |
|
# if ($c =~ /^(?![$special_range\x{FDD0}-\x{FDDF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}])[\x20-\x7E\xA0-\x{D7FF}\x{E000}-\x{10FFFD}]/) { |
738 |
|
# $self->{column}++; |
739 |
|
# $self->{column_prev}++; |
740 |
|
# $self->{prev_char} = [-1, -1, -1]; |
741 |
|
# $self->{next_char} = -1; |
742 |
|
# return \$c; |
743 |
|
# } elsif (defined $c) { |
744 |
|
# #$input->ungetc (ord $c); |
745 |
|
# $self->{next_next_char} = $c; |
746 |
|
# return undef; |
747 |
|
# } else { |
748 |
|
# return undef; |
749 |
|
# } |
750 |
|
# }; # $self->{getc_until} |
751 |
|
# } |
752 |
|
|
753 |
my $onerror = $_[2] || sub { |
my $onerror = $_[2] || sub { |
754 |
my (%opt) = @_; |
my (%opt) = @_; |
755 |
my $line = $opt{token} ? $opt{token}->{line} : $opt{line}; |
my $line = $opt{token} ? $opt{token}->{line} : $opt{line}; |
1046 |
data => chr $self->{next_char}, |
data => chr $self->{next_char}, |
1047 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
1048 |
}; |
}; |
1049 |
|
|
1050 |
|
my $s = $self->{getc_until}->(q[-!<>&]); |
1051 |
|
if ($s) { |
1052 |
|
$token->{data} .= $$s; |
1053 |
|
} |
1054 |
|
|
1055 |
## Stay in the data state |
## Stay in the data state |
1056 |
!!!next-input-character; |
!!!next-input-character; |
1057 |
|
|
7817 |
}; |
}; |
7818 |
$p->{prev_char} = [-1, -1, -1]; |
$p->{prev_char} = [-1, -1, -1]; |
7819 |
$p->{next_char} = -1; |
$p->{next_char} = -1; |
7820 |
|
|
7821 |
|
$p->{getc_until} = sub { |
7822 |
|
## TODO: ... |
7823 |
|
return undef; |
7824 |
|
}; # $p->{getc_until}; |
7825 |
|
|
7826 |
my $ponerror = $onerror || sub { |
my $ponerror = $onerror || sub { |
7827 |
my (%opt) = @_; |
my (%opt) = @_; |
7828 |
my $line = $opt{line}; |
my $line = $opt{line}; |