528 |
## such as |parse_byte_string| in this module, must ensure that it does |
## such as |parse_byte_string| in this module, must ensure that it does |
529 |
## strip the BOM and never strip any ZWNBSP. |
## strip the BOM and never strip any ZWNBSP. |
530 |
|
|
531 |
*parse_char_string = \&parse_string; |
sub parse_char_string ($$$;$) { |
532 |
|
my $self = shift; |
533 |
|
open my $input, '<:utf8', ref $_[0] ? $_[0] : \($_[0]); |
534 |
|
return $self->parse_char_stream ($input, @_[1..$#_]); |
535 |
|
} # parse_char_string |
536 |
|
*parse_string = \&parse_char_string; |
537 |
|
|
538 |
sub parse_string ($$$;$) { |
sub parse_char_stream ($$$;$) { |
539 |
my $self = ref $_[0] ? shift : shift->new; |
my $self = ref $_[0] ? shift : shift->new; |
540 |
my $s = ref $_[0] ? $_[0] : \($_[0]); |
my $input = $_[0]; |
541 |
$self->{document} = $_[1]; |
$self->{document} = $_[1]; |
542 |
@{$self->{document}->child_nodes} = (); |
@{$self->{document}->child_nodes} = (); |
543 |
|
|
556 |
pop @{$self->{prev_char}}; |
pop @{$self->{prev_char}}; |
557 |
unshift @{$self->{prev_char}}, $self->{next_char}; |
unshift @{$self->{prev_char}}, $self->{next_char}; |
558 |
|
|
559 |
$self->{next_char} = -1 and return if $i >= length $$s; |
my $char = $input->getc; |
560 |
$self->{next_char} = ord substr $$s, $i++, 1; |
$self->{next_char} = -1 and return unless defined $char; |
561 |
|
$self->{next_char} = ord $char; |
562 |
|
|
563 |
($self->{line_prev}, $self->{column_prev}) |
($self->{line_prev}, $self->{column_prev}) |
564 |
= ($self->{line}, $self->{column}); |
= ($self->{line}, $self->{column}); |
570 |
$self->{column} = 0; |
$self->{column} = 0; |
571 |
} elsif ($self->{next_char} == 0x000D) { # CR |
} elsif ($self->{next_char} == 0x000D) { # CR |
572 |
!!!cp ('j2'); |
!!!cp ('j2'); |
573 |
$i++ if substr ($$s, $i, 1) eq "\x0A"; |
my $next = $input->getc; |
574 |
|
if ($next ne "\x0A") { |
575 |
|
$input->ungetc ($next); |
576 |
|
} |
577 |
$self->{next_char} = 0x000A; # LF # MUST |
$self->{next_char} = 0x000A; # LF # MUST |
578 |
$self->{line}++; |
$self->{line}++; |
579 |
$self->{column} = 0; |
$self->{column} = 0; |
626 |
delete $self->{parse_error}; # remove loop |
delete $self->{parse_error}; # remove loop |
627 |
|
|
628 |
return $self->{document}; |
return $self->{document}; |
629 |
} # parse_string |
} # parse_char_stream |
630 |
|
|
631 |
sub new ($) { |
sub new ($) { |
632 |
my $class = shift; |
my $class = shift; |