--- test/html-webhacc/cc.cgi 2007/11/18 05:30:03 1.25 +++ test/html-webhacc/cc.cgi 2008/02/10 02:30:14 1.32 @@ -52,7 +52,6 @@ $| = 0; my $input = get_input_document ($http, $dom); - my $inner_html_element = $http->get_parameter ('e'); my $char_length = 0; my %time; @@ -90,47 +89,7 @@ ]; my $result = {conforming_min => 1, conforming_max => 1}; - print_http_header_section ($input, $result); - - my $doc; - my $el; - my $manifest; - - if ($input->{media_type} eq 'text/html') { - ($doc, $el) = print_syntax_error_html_section ($input, $result); - print_source_string_section - (\($input->{s}), $input->{charset} || $doc->input_encoding); - } elsif ({ - 'text/xml' => 1, - 'application/atom+xml' => 1, - 'application/rss+xml' => 1, - 'application/svg+xml' => 1, - 'application/xhtml+xml' => 1, - 'application/xml' => 1, - }->{$input->{media_type}}) { - ($doc, $el) = print_syntax_error_xml_section ($input, $result); - print_source_string_section (\($input->{s}), $doc->input_encoding); - } elsif ($input->{media_type} eq 'text/cache-manifest') { -## TODO: MUST be text/cache-manifest - $manifest = print_syntax_error_manifest_section ($input, $result); - print_source_string_section (\($input->{s}), 'utf-8'); - } else { - ## TODO: Change HTTP status code?? - print_result_unknown_type_section ($input, $result); - } - - if (defined $doc or defined $el) { - print_structure_dump_dom_section ($doc, $el); - my $elements = print_structure_error_dom_section ($doc, $el, $result); - print_table_section ($elements->{table}) if @{$elements->{table}}; - print_id_section ($elements->{id}) if keys %{$elements->{id}}; - print_term_section ($elements->{term}) if keys %{$elements->{term}}; - print_class_section ($elements->{class}) if keys %{$elements->{class}}; - } elsif (defined $manifest) { - print_structure_dump_manifest_section ($manifest); - print_structure_error_manifest_section ($manifest, $result); - } - + check_and_print ($input => $result); print_result_section ($result); } else { print STDOUT qq[]; @@ -167,7 +126,7 @@ $result->{conforming_min} = 0; } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') { $result->{$layer}->{warning}++; - } elsif ($err->{level} eq 'unsupported') { + } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') { $result->{$layer}->{unsupported}++; $result->{unsupported} = 1; } else { @@ -186,14 +145,62 @@ } } # add_error +sub check_and_print ($$) { + my ($input, $result) = @_; + $input->{id_prefix} = ''; + #$input->{nested} = 1/0; + + print_http_header_section ($input, $result); + + my $doc; + my $el; + my $manifest; + + if ($input->{media_type} eq 'text/html') { + ($doc, $el) = print_syntax_error_html_section ($input, $result); + print_source_string_section + (\($input->{s}), $input->{charset} || $doc->input_encoding); + } elsif ({ + 'text/xml' => 1, + 'application/atom+xml' => 1, + 'application/rss+xml' => 1, + 'application/svg+xml' => 1, + 'application/xhtml+xml' => 1, + 'application/xml' => 1, + }->{$input->{media_type}}) { + ($doc, $el) = print_syntax_error_xml_section ($input, $result); + print_source_string_section (\($input->{s}), $doc->input_encoding); + } elsif ($input->{media_type} eq 'text/cache-manifest') { +## TODO: MUST be text/cache-manifest + $manifest = print_syntax_error_manifest_section ($input, $result); + print_source_string_section (\($input->{s}), 'utf-8'); + } else { + ## TODO: Change HTTP status code?? + print_result_unknown_type_section ($input, $result); + } + + if (defined $doc or defined $el) { + print_structure_dump_dom_section ($input, $doc, $el); + my $elements = print_structure_error_dom_section + ($input, $doc, $el, $result); + print_table_section ($input, $elements->{table}) if @{$elements->{table}}; + print_id_section ($input, $elements->{id}) if keys %{$elements->{id}}; + print_term_section ($input, $elements->{term}) if keys %{$elements->{term}}; + print_class_section ($input, $elements->{class}) if keys %{$elements->{class}}; + } elsif (defined $manifest) { + print_structure_dump_manifest_section ($input, $manifest); + print_structure_error_manifest_section ($input, $manifest, $result); + } +} # check_and_print + sub print_http_header_section ($$) { my ($input, $result) = @_; return unless defined $input->{header_status_code} or defined $input->{header_status_text} or @{$input->{header_field}}; - push @nav, ['#source-header' => 'HTTP Header']; - print STDOUT qq[
+ push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested}; + print STDOUT qq[

HTTP Header

Note: Due to the limitation of the @@ -227,11 +234,11 @@ require Whatpm::HTML; print STDOUT qq[ -

+

Parse Errors

]; - push @nav, ['#parse-errors' => 'Parse Error']; + push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested}; my $onerror = sub { my (%opt) = @_; @@ -253,8 +260,9 @@ my $doc = $dom->create_document; my $el; + my $inner_html_element = $http->get_parameter ('e'); if (defined $inner_html_element and length $inner_html_element) { - $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now. + $input->{charset} ||= 'windows-1252'; ## TODO: for now. my $time1 = time; my $t = Encode::decode ($input->{charset}, $input->{s}); $time{decode} = time - $time1; @@ -270,6 +278,8 @@ ($input->{charset}, $input->{s} => $doc, $onerror); $time{parse_html} = time - $time1; } + $doc->manakai_charset ($input->{official_charset}) + if defined $input->{official_charset}; print STDOUT qq[
]; @@ -282,11 +292,11 @@ require Message::DOM::XMLParserTemp; print STDOUT qq[ -
+

Parse Errors

]; - push @nav, ['#parse-errors' => 'Parse Error']; + push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix}; my $onerror = sub { my $err = shift; @@ -310,6 +320,8 @@ my $doc = Message::DOM::XMLParserTemp->parse_byte_stream ($fh => $dom, $onerror, charset => $input->{charset}); $time{parse_xml} = time - $time1; + $doc->manakai_charset ($input->{official_charset}) + if defined $input->{official_charset}; print STDOUT qq[
]; @@ -322,16 +334,17 @@ require Whatpm::CacheManifest; print STDOUT qq[ -
+

Parse Errors

]; - push @nav, ['#parse-errors' => 'Parse Error']; + push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested}; my $onerror = sub { my (%opt) = @_; my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}); - print STDOUT qq[
], get_error_label (\%opt), qq[
]; + print STDOUT qq[
], get_error_label ($input, \%opt), + qq[
]; $type =~ tr/ /-/; $type =~ s/\|/%7C/g; $msg .= qq[ [Description]]; @@ -358,20 +371,22 @@ my $s = \($enc->decode (${$_[0]})); my $i = 1; - push @nav, ['#source-string' => 'Source']; - print STDOUT qq[
+ push @nav, ['#source-string' => 'Source'] unless $input->{nested}; + print STDOUT qq[

Document Source

    \n]; if (length $$s) { while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) { - print STDOUT qq[
  1. ], htescape $1, "
  2. \n"; + print STDOUT qq[
  3. ], htescape $1, + "
  4. \n"; $i++; } if ($$s =~ /\G([^\x0A]+)/gc) { - print STDOUT qq[
  5. ], htescape $1, "
  6. \n"; + print STDOUT qq[
  7. ], htescape $1, + "
  8. \n"; } } else { - print STDOUT q[
  9. ]; + print STDOUT q[
  10. ]; } print STDOUT "
"; } # print_input_string_section @@ -388,7 +403,7 @@ next; } - my $node_id = 'node-'.refaddr $child; + my $node_id = $input->{id_prefix} . 'node-'.refaddr $child; my $nt = $child->node_type; if ($nt == $child->ELEMENT_NODE) { my $child_nsuri = $child->namespace_uri; @@ -399,7 +414,7 @@ $r .= '
    '; for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] } @{$child->attributes}) { - $r .= qq[
  • ] . htescape ($attr->[0]) . ' = '; ## ISSUE: case? + $r .= qq[
  • ] . htescape ($attr->[0]) . ' = '; ## ISSUE: case? $r .= '' . htescape ($attr->[1]) . '
  • '; ## TODO: children } $r .= '
'; @@ -420,6 +435,21 @@ } elsif ($nt == $child->DOCUMENT_NODE) { $r .= qq'
  • Document'; $r .= qq[
      ]; + my $cp = $child->manakai_charset; + if (defined $cp) { + $r .= qq[
    • charset parameter = ]; + $r .= htescape ($cp) . qq[
    • ]; + } + $r .= qq[
    • inputEncoding = ]; + my $ie = $child->input_encoding; + if (defined $ie) { + $r .= qq[@{[htescape ($ie)]}]; + if ($child->manakai_has_bom) { + $r .= qq[ (with BOM)]; + } + } else { + $r .= qq[(null)]; + } $r .= qq[
    • @{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}
    • ]; $r .= qq[
    • @{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}
    • ]; unless ($child->manakai_is_html) { @@ -453,28 +483,28 @@ print STDOUT $r; } # print_document_tree -sub print_structure_dump_dom_section ($$) { - my ($doc, $el) = @_; +sub print_structure_dump_dom_section ($$$) { + my ($input, $doc, $el) = @_; print STDOUT qq[ -
      +

      Document Tree

      ]; - push @nav, ['#document-tree' => 'Tree']; + push @nav, ['#document-tree' => 'Tree'] unless $input->{nested}; print_document_tree ($el || $doc); print STDOUT qq[
      ]; } # print_structure_dump_dom_section -sub print_structure_dump_manifest_section ($) { - my $manifest = shift; +sub print_structure_dump_manifest_section ($$) { + my ($input, $manifest) = @_; print STDOUT qq[ -
      +

      Cache Manifest

      ]; - push @nav, ['#dump-manifest' => 'Caceh Manifest']; + push @nav, ['#dump-manifest' => 'Caceh Manifest'] unless $input->{nested}; print STDOUT qq[
      Explicit entries
      ]; for my $uri (@{$manifest->[0]}) { @@ -501,14 +531,14 @@ print STDOUT qq[
      ]; } # print_structure_dump_manifest_section -sub print_structure_error_dom_section ($$$) { - my ($doc, $el, $result) = @_; +sub print_structure_error_dom_section ($$$$) { + my ($input, $doc, $el, $result) = @_; - print STDOUT qq[
      + print STDOUT qq[

      Document Errors

      ]; - push @nav, ['#document-errors' => 'Document Error']; + push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested}; require Whatpm::ContentChecker; my $onerror = sub { @@ -517,7 +547,7 @@ $type =~ tr/ /-/; $type =~ s/\|/%7C/g; $msg .= qq[ [Description]]; - print STDOUT qq[
      ] . get_error_label (\%opt) . + print STDOUT qq[
      ] . get_error_label ($input, \%opt) . qq[
      \n
      ], get_error_level_label (\%opt); print STDOUT $msg, "
      \n"; add_error ('structure', \%opt => $result); @@ -538,13 +568,13 @@ } # print_structure_error_dom_section sub print_structure_error_manifest_section ($$$) { - my ($manifest, $result) = @_; + my ($input, $manifest, $result) = @_; - print STDOUT qq[
      + print STDOUT qq[

      Document Errors

      ]; - push @nav, ['#document-errors' => 'Document Error']; + push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested}; require Whatpm::CacheManifest; Whatpm::CacheManifest->check_manifest ($manifest, sub { @@ -553,7 +583,7 @@ $type =~ tr/ /-/; $type =~ s/\|/%7C/g; $msg .= qq[ [Description]]; - print STDOUT qq[
      ] . get_error_label (\%opt) . + print STDOUT qq[
      ] . get_error_label ($input, \%opt) . qq[
      \n
      ], $msg, "
      \n"; add_error ('structure', \%opt => $result); }); @@ -561,12 +591,12 @@ print STDOUT qq[
      ]; } # print_structure_error_manifest_section -sub print_table_section ($) { - my $tables = shift; +sub print_table_section ($$) { + my ($input, $tables) = @_; - push @nav, ['#tables' => 'Tables']; + push @nav, ['#tables' => 'Tables'] unless $input->{nested}; print STDOUT qq[ -
      +

      Tables

      @@ -581,8 +611,8 @@ my $i = 0; for my $table_el (@$tables) { $i++; - print STDOUT qq[

      ] . - get_node_link ($table_el) . q[

      ]; + print STDOUT qq[

      ] . + get_node_link ($input, $table_el) . q[

      ]; ## TODO: Make |ContentChecker| return |form_table| result ## so that this script don't have to run the algorithm twice. @@ -614,18 +644,19 @@ print STDOUT '
      ]; + print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')]; + print STDOUT qq[, '$input->{id_prefix}');]; } print STDOUT qq[
      ]; } # print_table_section -sub print_id_section ($) { - my $ids = shift; +sub print_id_section ($$) { + my ($input, $ids) = @_; - push @nav, ['#identifiers' => 'IDs']; + push @nav, ['#identifiers' => 'IDs'] unless $input->{nested}; print STDOUT qq[ -
      +

      Identifiers

      @@ -633,18 +664,18 @@ for my $id (sort {$a cmp $b} keys %$ids) { print STDOUT qq[
      @{[htescape $id]}
      ]; for (@{$ids->{$id}}) { - print STDOUT qq[
      ].get_node_link ($_).qq[
      ]; + print STDOUT qq[
      ].get_node_link ($input, $_).qq[
      ]; } } print STDOUT qq[
      ]; } # print_id_section -sub print_term_section ($) { - my $terms = shift; +sub print_term_section ($$) { + my ($input, $terms) = @_; - push @nav, ['#terms' => 'Terms']; + push @nav, ['#terms' => 'Terms'] unless $input->{nested}; print STDOUT qq[ -
      +

      Terms

      @@ -652,18 +683,18 @@ for my $term (sort {$a cmp $b} keys %$terms) { print STDOUT qq[
      @{[htescape $term]}
      ]; for (@{$terms->{$term}}) { - print STDOUT qq[
      ].get_node_link ($_).qq[
      ]; + print STDOUT qq[
      ].get_node_link ($input, $_).qq[
      ]; } } print STDOUT qq[
      ]; } # print_term_section -sub print_class_section ($) { - my $classes = shift; +sub print_class_section ($$) { + my ($input, $classes) = @_; - push @nav, ['#classes' => 'Classes']; + push @nav, ['#classes' => 'Classes'] unless $input->{nested}; print STDOUT qq[ -
      +

      Classes

      @@ -671,7 +702,7 @@ for my $class (sort {$a cmp $b} keys %$classes) { print STDOUT qq[
      @{[htescape $class]}
      ]; for (@{$classes->{$class}}) { - print STDOUT qq[
      ].get_node_link ($_).qq[
      ]; + print STDOUT qq[
      ].get_node_link ($input, $_).qq[
      ]; } } print STDOUT qq[
      ]; @@ -787,9 +818,9 @@
      ]; push @nav, ['#parse-errors' => 'Errors']; - add_error (char => {level => 'unsupported'} => $result); - add_error (syntax => {level => 'unsupported'} => $result); - add_error (structure => {level => 'unsupported'} => $result); + add_error (char => {level => 'u'} => $result); + add_error (syntax => {level => 'u'} => $result); + add_error (structure => {level => 'u'} => $result); } # print_result_unknown_type_section sub print_result_input_error_section ($) { @@ -798,10 +829,10 @@

      Input Error: @{[htescape ($input->{error_status_text})]}

      ]; push @nav, ['#result-summary' => 'Result']; -} # print_Result_input_error_section +} # print_result_input_error_section -sub get_error_label ($) { - my $err = shift; +sub get_error_label ($$) { + my ($input, $err) = @_; my $r = ''; @@ -816,7 +847,7 @@ if (defined $err->{node}) { $r .= ' ' if length $r; - $r = get_node_link ($err->{node}); + $r = get_node_link ($input, $err->{node}); } if (defined $err->{index}) { @@ -846,7 +877,7 @@ } elsif ($err->{level} eq 'w') { $r = qq[Warning: ]; - } elsif ($err->{level} eq 'unsupported') { + } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') { $r = qq[Not supported: ]; } else { @@ -885,9 +916,9 @@ return join '/', @r; } # get_node_path -sub get_node_link ($) { - return qq[] . - htescape (get_node_path ($_[0])) . qq[]; +sub get_node_link ($$) { + return qq[] . + htescape (get_node_path ($_[1])) . qq[]; } # get_node_link { @@ -895,7 +926,8 @@ sub load_text_catalog ($) { my $lang = shift; # MUST be a canonical lang name - open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!"; + open my $file, '<:utf8', "cc-msg.$lang.txt" + or die "$0: cc-msg.$lang.txt: $!"; while (<$file>) { if (s/^([^;]+);([^;]*);//) { my ($type, $cls, $msg) = ($1, $2, $_); @@ -908,6 +940,7 @@ sub get_text ($) { my ($type, $level, $node) = @_; $type = $level . ':' . $type if defined $level; + $level = 'm' unless defined $level; my @arg; { if (defined $Msg->{$type}) { @@ -932,13 +965,13 @@ ? htescape ($node->owner_element->manakai_local_name) : '' }ge; - return ($type, $Msg->{$type}->[0], $msg); + return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg); } elsif ($type =~ s/:([^:]*)$//) { unshift @arg, $1; redo; } } - return ($type, '', htescape ($_[0])); + return ($type, 'level-'.$level, htescape ($_[0])); } # get_text } @@ -994,6 +1027,7 @@ $ua->protocols_allowed ([qw/http/]); $ua->max_size (1000_000); my $req = HTTP::Request->new (GET => $request_uri); + $req->header ('Accept-Encoding' => 'identity, *; q=0'); my $res = $ua->request ($req); ## TODO: 401 sets |is_success| true. if ($res->is_success or $http->get_parameter ('error-page')) { @@ -1006,6 +1040,7 @@ if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) { $r->{charset} = lc $1; $r->{charset} =~ tr/\\//d; + $r->{official_charset} = $r->{charset}; } my $input_charset = $http->get_parameter ('charset'); @@ -1049,6 +1084,7 @@ $r->{charset} = ''.$http->get_parameter ('_charset_'); $r->{charset} =~ s/\s+//g; $r->{charset} = 'utf-8' if $r->{charset} eq ''; + $r->{official_charset} = $r->{charset}; $r->{header_field} = []; require Whatpm::ContentType; @@ -1076,6 +1112,7 @@ if ($r->{media_type} eq 'text/xml') { unless (defined $r->{charset}) { $r->{charset} = 'us-ascii'; + $r->{official_charset} = $r->{charset}; } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') { $r->{charset_overridden} = 0; } @@ -1126,4 +1163,4 @@ =cut -## $Date: 2007/11/18 05:30:03 $ +## $Date: 2008/02/10 02:30:14 $