354 |
return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]); |
return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]); |
355 |
} # parse_byte_string |
} # parse_byte_string |
356 |
|
|
357 |
sub parse_byte_stream ($$$$;$) { |
sub parse_byte_stream ($$$$;$$) { |
358 |
|
# my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_; |
359 |
my $self = ref $_[0] ? shift : shift->new; |
my $self = ref $_[0] ? shift : shift->new; |
360 |
my $charset_name = shift; |
my $charset_name = shift; |
361 |
my $byte_stream = $_[0]; |
my $byte_stream = $_[0]; |
366 |
}; |
}; |
367 |
$self->{parse_error} = $onerror; # updated later by parse_char_string |
$self->{parse_error} = $onerror; # updated later by parse_char_string |
368 |
|
|
369 |
|
my $get_wrapper = $_[3] || sub ($) { |
370 |
|
return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle |
371 |
|
}; |
372 |
|
|
373 |
## HTML5 encoding sniffing algorithm |
## HTML5 encoding sniffing algorithm |
374 |
require Message::Charset::Info; |
require Message::Charset::Info; |
375 |
my $charset; |
my $charset; |
567 |
${$opt{octets}} = "\x{FFFD}"; # relacement character |
${$opt{octets}} = "\x{FFFD}"; # relacement character |
568 |
} |
} |
569 |
}; |
}; |
570 |
$char_stream->onerror ($char_onerror); |
|
571 |
|
my $wrapped_char_stream = $get_wrapper->($char_stream); |
572 |
|
$wrapped_char_stream->onerror ($char_onerror); |
573 |
|
|
574 |
my @args = @_; shift @args; # $s |
my @args = @_; shift @args; # $s |
575 |
my $return; |
my $return; |
576 |
try { |
try { |
577 |
$return = $self->parse_char_stream ($char_stream, @args); |
$return = $self->parse_char_stream ($wrapped_char_stream, @args); |
578 |
} catch Whatpm::HTML::RestartParser with { |
} catch Whatpm::HTML::RestartParser with { |
579 |
## NOTE: Invoked after {change_encoding}. |
## NOTE: Invoked after {change_encoding}. |
580 |
|
|
597 |
$self->{input_encoding} = $charset->get_iana_name; |
$self->{input_encoding} = $charset->get_iana_name; |
598 |
} |
} |
599 |
$self->{confident} = 1; |
$self->{confident} = 1; |
600 |
$char_stream->onerror ($char_onerror); |
|
601 |
$return = $self->parse_char_stream ($char_stream, @args); |
$wrapped_char_stream = $get_wrapper->($char_stream); |
602 |
|
$wrapped_char_stream->onerror ($char_onerror); |
603 |
|
|
604 |
|
$return = $self->parse_char_stream ($wrapped_char_stream, @args); |
605 |
}; |
}; |
606 |
return $return; |
return $return; |
607 |
} # parse_byte_stream |
} # parse_byte_stream |
615 |
## such as |parse_byte_string| in this module, must ensure that it does |
## such as |parse_byte_string| in this module, must ensure that it does |
616 |
## strip the BOM and never strip any ZWNBSP. |
## strip the BOM and never strip any ZWNBSP. |
617 |
|
|
618 |
sub parse_char_string ($$$;$) { |
sub parse_char_string ($$$;$$) { |
619 |
|
#my ($self, $s, $doc, $onerror, $get_wrapper) = @_; |
620 |
my $self = shift; |
my $self = shift; |
621 |
require utf8; |
require utf8; |
622 |
my $s = ref $_[0] ? $_[0] : \($_[0]); |
my $s = ref $_[0] ? $_[0] : \($_[0]); |
623 |
open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s; |
open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s; |
624 |
|
if ($_[3]) { |
625 |
|
$input = $_[3]->($input); |
626 |
|
} |
627 |
return $self->parse_char_stream ($input, @_[1..$#_]); |
return $self->parse_char_stream ($input, @_[1..$#_]); |
628 |
} # parse_char_string |
} # parse_char_string |
629 |
*parse_string = \&parse_char_string; |
*parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility. |
630 |
|
|
631 |
sub parse_char_stream ($$$;$) { |
sub parse_char_stream ($$$;$) { |
632 |
my $self = ref $_[0] ? shift : shift->new; |
my $self = ref $_[0] ? shift : shift->new; |
7472 |
## TODO: script stuffs |
## TODO: script stuffs |
7473 |
} # _tree_construct_main |
} # _tree_construct_main |
7474 |
|
|
7475 |
sub set_inner_html ($$$) { |
sub set_inner_html ($$$;$) { |
7476 |
my $class = shift; |
my $class = shift; |
7477 |
my $node = shift; |
my $node = shift; |
7478 |
my $s = \$_[0]; |
my $s = \$_[0]; |
7479 |
my $onerror = $_[1]; |
my $onerror = $_[1]; |
7480 |
|
my $get_wrapper = $_[2] || sub ($) { return $_[0] }; |
7481 |
|
|
7482 |
## ISSUE: Should {confident} be true? |
## ISSUE: Should {confident} be true? |
7483 |
|
|
7496 |
} |
} |
7497 |
|
|
7498 |
## Step 3, 4, 5 # MUST |
## Step 3, 4, 5 # MUST |
7499 |
$class->parse_string ($$s => $node, $onerror); |
$class->parse_char_string ($$s => $node, $onerror, $get_wrapper); |
7500 |
} elsif ($nt == 1) { |
} elsif ($nt == 1) { |
7501 |
## TODO: If non-html element |
## TODO: If non-html element |
7502 |
|
|
7503 |
## NOTE: Most of this code is copied from |parse_string| |
## NOTE: Most of this code is copied from |parse_string| |
7504 |
|
|
7505 |
|
## TODO: Support for $get_wrapper |
7506 |
|
|
7507 |
## Step 1 # MUST |
## Step 1 # MUST |
7508 |
my $this_doc = $node->owner_document; |
my $this_doc = $node->owner_document; |
7509 |
my $doc = $this_doc->implementation->create_document; |
my $doc = $this_doc->implementation->create_document; |