package WebHACC::Language::HTML;
use strict;
require WebHACC::Language::DOM;
push our @ISA, 'WebHACC::Language::DOM';

sub new ($) {
  return bless {}, shift;
} # new

sub generate_syntax_error_section ($) {
  my $self = shift;
  
  require Message::DOM::DOMImplementation;
  require Encode;
  require Whatpm::HTML;
  
  my $out = $self->output;
  $out->start_section (role => 'parse-errors');
  $out->start_error_list (role => 'parse-errors');
  $self->result->layer_applicable ('syntax');

  my $input = $self->input;
  my $result = $self->result;

  my $onerror = sub {
    my %opt = @_;
    $result->add_error (layer => 'syntax', %opt);

    if ($opt{type} eq 'chardecode:no error') {
      $self->result->layer_uncertain ('encode');
    } elsif ($opt{type} eq 'chardecode:fallback' or
             $opt{type} eq 'charset:not supported') {
      $self->result->layer_uncertain ('charset');
      $self->result->layer_uncertain ('syntax');
      $self->result->layer_uncertain ('structure');
      $self->result->layer_uncertain ('semantics');
    }
  };

  $self->result->layer_applicable ('charset');
  my $char_checker = sub ($) {
    require Whatpm::Charset::UnicodeChecker;
    return Whatpm::Charset::UnicodeChecker->new_handle ($_[0], 'html5');
  }; # $char_checker

  my $dom = Message::DOM::DOMImplementation->new;
  my $doc = $dom->create_document;
  my $el;
  my $inner_html_element = $input->{inner_html_element};
  if (defined $inner_html_element and length $inner_html_element) {
    $input->{charset} ||= 'utf-8';
    my $t = \($input->{s});
    unless ($input->{is_char_string}) {
      $t = \(Encode::decode ($input->{charset}, $$t));
      $self->result->layer_applicable ('encode');
    }
    
    $el = $doc->create_element_ns
        ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
    Whatpm::HTML->set_inner_html ($el, $$t, $onerror, $char_checker);

    $self->{structure} = $el;
    $self->{_structure_root} = $doc;
        ## NOTE: This is necessary, otherwise it would be garbage collected
        ## before $el is useless, since $el->owner_document is only a weak
        ## reference.
  } else {
    if ($input->{is_char_string}) {
      Whatpm::HTML->parse_char_string ($input->{s} => $doc,
                                       $onerror, $char_checker);
    } else {
      $self->result->layer_applicable ('encode');
      Whatpm::HTML->parse_byte_string
          ($input->{charset}, $input->{s} => $doc, $onerror, $char_checker);
    }

    $self->{structure} = $doc;
  }
  $doc->manakai_charset ($input->{official_charset})
      if defined $input->{official_charset};

  ## TODO: We need to issue some warning if media type/charset is
  ## explicitly overridden by the user.

  $doc->document_uri ($input->url);
  $doc->manakai_entity_base_uri ($input->{base_uri});

  $doc->input_encoding (undef) if $input->isa ('WebHACC::Input::Text');

  $out->end_error_list (role => 'parse-errors');
  $out->end_section;
} # generate_syntax_error_section

sub source_charset ($) {
  my $self = shift;
  return (($self->{structure}->owner_document || $self->{structure})->input_encoding || $self->input->{charset});
  ## TODO: We need some way to get the source charset reliably.  The
  ## |input_encoding| DOM attribute might be intentionally left blank
  ## when the input is the direct input form, but even that case the
  ## charset information should be useful because the input string
  ## might be a byte sequence.  In addition, the |input_encoding| does
  ## not reflect the fallback encoding in use.  On the contrary, the
  ## |{charset}| property of the |input| object is always the value
  ## from the lower-level protocol and that might be ignored by the
  ## HTML sniffer.
} # source_charset

1;