--- test/html-webhacc/cc.cgi	2007/07/17 13:52:54	1.13
+++ test/html-webhacc/cc.cgi	2007/11/23 12:08:32	1.29
@@ -1,13 +1,12 @@
 #!/usr/bin/perl
 use strict;
+use utf8;
 
 use lib qw[/home/httpd/html/www/markup/html/whatpm
-           /home/wakaba/work/manakai/lib
-           /home/wakaba/public_html/-temp/wiki/lib];
+           /home/wakaba/work/manakai2/lib];
 use CGI::Carp qw[fatalsToBrowser];
 use Scalar::Util qw[refaddr];
-
-use SuikaWiki::Input::HTTP; ## TODO: Use some better CGI module
+use Time::HiRes qw/time/;
 
 sub htescape ($) {
   my $s = $_[0];
@@ -21,23 +20,20 @@
   return $s;
 } # htescape
 
-my $http = SuikaWiki::Input::HTTP->new;
-
-## TODO: _charset_
+  use Message::CGI::HTTP;
+  my $http = Message::CGI::HTTP->new;
 
-  if ($http->meta_variable ('PATH_INFO') ne '/') {
+  if ($http->get_meta_variable ('PATH_INFO') ne '/') {
     print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
     exit;
   }
 
   binmode STDOUT, ':utf8';
+  $| = 1;
 
   require Message::DOM::DOMImplementation;
   my $dom = Message::DOM::DOMImplementation->new;
 
-  my $input = get_input_document ($http, $dom);
-  my $inner_html_element = $http->parameter ('e');
-
   load_text_catalog ('en'); ## TODO: conneg
 
   my @nav;
@@ -52,264 +48,92 @@
 
 
+];
+
+  $| = 0;
+  my $input = get_input_document ($http, $dom);
+  my $char_length = 0;
+  my %time;
 
+  print qq[
 
 
 Request URI 
     <@{[htescape $input->{request_uri}]} >Document URI 
-    <@{[htescape $input->{uri}]} ><@{[htescape $input->{uri}]} >
+      yet
   push @nav, ['#document-info' => 'Information'];
 
 if (defined $input->{s}) {
+  $char_length = length $input->{s};
 
   print STDOUT qq[
 
Base URI 
     <@{[htescape $input->{base_uri}]} >Internet Media Type 
     @{[htescape $input->{media_type}]}
-    @{[$input->{media_type_overridden} ? '(overridden) ' : '']}
+    @{[$input->{media_type_overridden} ? '
(overridden) ' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '
(sniffed; official type is: '.htescape ($input->{official_type}).')' : '(sniffed) ']}
 Character Encoding 
     @{[defined $input->{charset} ? ''.htescape ($input->{charset}).'' : '(none)']}
     @{[$input->{charset_overridden} ? '(overridden) ' : '']} 
+Length 
+    $char_length byte@{[$char_length == 1 ? '' : 's']} 
 
   
 ];
 
-  print_http_header_section ($input);
+  my $result = {conforming_min => 1, conforming_max => 1};
+  print_http_header_section ($input, $result);
 
   my $doc;
   my $el;
+  my $manifest;
 
   if ($input->{media_type} eq 'text/html') {
-    require Encode;
-    require Whatpm::HTML;
-
-    $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
-    
-    my $t = Encode::decode ($input->{charset}, $input->{s});
-
-    print STDOUT qq[
-
-
Parse Errors 
-
-
];
-  push @nav, ['#parse-errors' => 'Parse Error'];
-
-  my $onerror = sub {
-    my (%opt) = @_;
-    my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
-    if ($opt{column} > 0) {
-      print STDOUT qq[Line $opt{line}  column $opt{column}Line $opt{line} Description ]];
-    print STDOUT qq[$msg \n];
-  };
-
-  $doc = $dom->create_document;
-  if (defined $inner_html_element and length $inner_html_element) {
-    $el = $doc->create_element_ns
-        ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
-    Whatpm::HTML->set_inner_html ($el, $t, $onerror);
-  } else {
-    Whatpm::HTML->parse_string ($t => $doc, $onerror);
-  }
-
-  print STDOUT qq[ 
-
-
Parse Errors 
-
-
];
-  push @nav, ['#parse-errors' => 'Parse Error'];
-
-  my $onerror = sub {
-    my $err = shift;
-    my $line = $err->location->line_number;
-    print STDOUT qq[Line $line  column ];
-    print STDOUT $err->location->column_number, "";
-    print STDOUT htescape $err->text, " \n";
-    return 1;
-  };
-
-  open my $fh, '<', \($input->{s});
-  $doc = Message::DOM::XMLParserTemp->parse_byte_stream
-      ($fh => $dom, $onerror, charset => $input->{charset});
-
-    print STDOUT qq[ 
-
-
Media type @{[htescape $input->{media_type}]} is not supported! 
-
-
Document Tree 
-];
-    push @nav, ['#document-tree' => 'Tree'];
-
-    print_document_tree ($el || $doc);
-
-    print STDOUT qq[
-
-
-
-
Document Errors 
-
-
];
-    push @nav, ['#document-errors' => 'Document Error'];
-
-    require Whatpm::ContentChecker;
-    my $onerror = sub {
-      my %opt = @_;
-      my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
-      $type =~ tr/ /-/;
-      $type =~ s/\|/%7C/g;
-      $msg .= qq[ [Description ]];
-      print STDOUT qq[] . get_node_link ($opt{node}) .
-          qq[ \n], $msg, " \n";
-    };
-
-    my $elements;
-    if ($el) {
-      $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
-    } else {
-      $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
-    }
-
-    print STDOUT qq[ 
-
-
Tables 
-
-
-
-
-Structure of tables are visualized here if scripting is enabled. 
- 
-];
-
-      my $i = 0;
-      for my $table_el (@{$elements->{table}}) {
-        $i++;
-        print STDOUT qq[
] .
-            get_node_link ($table_el) . q[ ];
-
-        ## TODO: Make |ContentChecker| return |form_table| result
-        ## so that this script don't have to run the algorithm twice.
-        my $table = Whatpm::HTMLTable->form_table ($table_el);
-        
-        for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
-          next unless $_;
-          delete $_->{element};
-        }
-        
-        for (@{$table->{row_group}}) {
-          next unless $_;
-          next unless $_->{element};
-          $_->{type} = $_->{element}->manakai_local_name;
-          delete $_->{element};
-        }
-        
-        for (@{$table->{cell}}) {
-          next unless $_;
-          for (@{$_}) {
-            next unless $_;
-            for (@$_) {
-              $_->{id} = refaddr $_->{element} if defined $_->{element};
-              delete $_->{element};
-              $_->{is_header} = $_->{is_header} ? 1 : 0;
-            }
-          }
-        }
-        
-        print STDOUT '];
-      }
-    
-      print STDOUT qq[
-
Identifiers 
-
-
-];
-      for my $id (sort {$a cmp $b} keys %{$elements->{id}}) {
-        print STDOUT qq[@{[htescape $id]} ];
-        for (@{$elements->{id}->{$id}}) {
-          print STDOUT qq[].get_node_link ($_).qq[ ];
-        }
-      }
-      print STDOUT qq[ 
-
Terms 
-
-
-];
-      for my $term (sort {$a cmp $b} keys %{$elements->{term}}) {
-        print STDOUT qq[@{[htescape $term]} ];
-        for (@{$elements->{term}->{$term}}) {
-          print STDOUT qq[].get_node_link ($_).qq[ ];
-        }
-      }
-      print STDOUT qq[ 
-
Input Error : @{[htescape ($input->{error_status_text})]}
-
+
Parse Errors 
+
+
];
+  push @nav, ['#parse-errors' => 'Parse Error'];
+
+  my $onerror = sub {
+    my (%opt) = @_;
+    my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
+    if ($opt{column} > 0) {
+      print STDOUT qq[Line $opt{line}  column $opt{column}Line $opt{line} Description ]];
+    print STDOUT qq[], get_error_level_label (\%opt);
+    print STDOUT qq[$msg \n];
+
+    add_error ('syntax', \%opt => $result);
+  };
+
+  my $doc = $dom->create_document;
+  my $el;
+  my $inner_html_element = $http->get_parameter ('e');
+  if (defined $inner_html_element and length $inner_html_element) {
+    $input->{charset} ||= 'windows-1252'; ## TODO: for now.
+    my $time1 = time;
+    my $t = Encode::decode ($input->{charset}, $input->{s});
+    $time{decode} = time - $time1;
+    
+    $el = $doc->create_element_ns
+        ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
+    $time1 = time;
+    Whatpm::HTML->set_inner_html ($el, $t, $onerror);
+    $time{parse} = time - $time1;
+  } else {
+    my $time1 = time;
+    Whatpm::HTML->parse_byte_string
+        ($input->{charset}, $input->{s} => $doc, $onerror);
+    $time{parse_html} = time - $time1;
+  }
+  $doc->manakai_charset ($input->{official_charset})
+      if defined $input->{official_charset};
+  
+  print STDOUT qq[ 
+
Parse Errors 
+
+
];
+  push @nav, ['#parse-errors' => 'Parse Error'];
+
+  my $onerror = sub {
+    my $err = shift;
+    my $line = $err->location->line_number;
+    print STDOUT qq[Line $line  column ];
+    print STDOUT $err->location->column_number, "";
+    print STDOUT htescape $err->text, " \n";
+
+    add_error ('syntax', {type => $err->text,
+                level => [
+                          $err->SEVERITY_FATAL_ERROR => 'm',
+                          $err->SEVERITY_ERROR => 'm',
+                          $err->SEVERITY_WARNING => 's',
+                         ]->[$err->severity]} => $result);
+
+    return 1;
+  };
+
+  my $time1 = time;
+  open my $fh, '<', \($input->{s});
+  my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
+      ($fh => $dom, $onerror, charset => $input->{charset});
+  $time{parse_xml} = time - $time1;
+  $doc->manakai_charset ($input->{official_charset})
+      if defined $input->{official_charset};
+
+  print STDOUT qq[ 
+
Parse Errors 
+
+
];
+  push @nav, ['#parse-errors' => 'Parse Error'];
+
+  my $onerror = sub {
+    my (%opt) = @_;
+    my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
+    print STDOUT qq[], get_error_label (\%opt), qq[ ];
+    $type =~ tr/ /-/;
+    $type =~ s/\|/%7C/g;
+    $msg .= qq[ [Description ]];
+    print STDOUT qq[], get_error_level_label (\%opt);
+    print STDOUT qq[$msg \n];
+
+    add_error ('syntax', \%opt => $result);
+  };
+
+  my $time1 = time;
+  my $manifest = Whatpm::CacheManifest->parse_byte_string
+      ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
+  $time{parse_manifest} = time - $time1;
+
+  print STDOUT qq[ ';
         for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
                       @{$child->attributes}) {
-          $r .= qq[] . htescape ($attr->[0]) . ' = '; ## ISSUE: case?
+          $r .= qq[] . htescape ($attr->[0]) . ' = '; ## ISSUE: case?
           $r .= '' . htescape ($attr->[1]) . '  ';
@@ -429,6 +424,21 @@
     } elsif ($nt == $child->DOCUMENT_NODE) {
       $r .= qq'Document';
       $r .= qq[];
+      my $cp = $child->manakai_charset;
+      if (defined $cp) {
+        $r .= qq[charset parameter = ];
+        $r .= htescape ($cp) . qq[inputEncoding = ];
+      my $ie = $child->input_encoding;
+      if (defined $ie) {
+        $r .= qq[@{[htescape ($ie)]}];
+        if ($child->manakai_has_bom) {
+          $r .= qq[ (with BOM null)];
+      }
       $r .= qq[@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]} ];
       $r .= qq[@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]} ];
       unless ($child->manakai_is_html) {
@@ -462,6 +472,411 @@
   print STDOUT $r;
 } # print_document_tree
 
+sub print_structure_dump_dom_section ($$) {
+  my ($doc, $el) = @_;
+
+  print STDOUT qq[
+
+
Document Tree 
+];
+  push @nav, ['#document-tree' => 'Tree'];
+
+  print_document_tree ($el || $doc);
+
+  print STDOUT qq[];
+} # print_structure_dump_dom_section
+
+sub print_structure_dump_manifest_section ($) {
+  my $manifest = shift;
+
+  print STDOUT qq[
+
+
Cache Manifest 
+];
+  push @nav, ['#dump-manifest' => 'Caceh Manifest'];
+
+  print STDOUT qq[
Explicit entries ];
+  for my $uri (@{$manifest->[0]}) {
+    my $euri = htescape ($uri);
+    print STDOUT qq[<$euri >Fallback entries 
+      Oppotunistic Caching Namespace 
+      Fallback Entry ];
+  for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
+    my $euri = htescape ($uri);
+    my $euri2 = htescape ($manifest->[1]->{$uri});
+    print STDOUT qq[<$euri ><$euri2 > 
 Online whitelist ];
+  for my $uri (@{$manifest->[2]}) {
+    my $euri = htescape ($uri);
+    print STDOUT qq[<$euri >
+
Document Errors 
+
+
];
+  push @nav, ['#document-errors' => 'Document Error'];
+
+  require Whatpm::ContentChecker;
+  my $onerror = sub {
+    my %opt = @_;
+    my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
+    $type =~ tr/ /-/;
+    $type =~ s/\|/%7C/g;
+    $msg .= qq[ [Description ]];
+    print STDOUT qq[] . get_error_label (\%opt) .
+        qq[ \n], get_error_level_label (\%opt);
+    print STDOUT $msg, " \n";
+    add_error ('structure', \%opt => $result);
+  };
+
+  my $elements;
+  my $time1 = time;
+  if ($el) {
+    $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
+  } else {
+    $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
+  }
+  $time{check} = time - $time1;
+
+  print STDOUT qq[ 
+
Document Errors 
+
+
];
+  push @nav, ['#document-errors' => 'Document Error'];
+
+  require Whatpm::CacheManifest;
+  Whatpm::CacheManifest->check_manifest ($manifest, sub {
+    my %opt = @_;
+    my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
+    $type =~ tr/ /-/;
+    $type =~ s/\|/%7C/g;
+    $msg .= qq[ [Description ]];
+    print STDOUT qq[] . get_error_label (\%opt) .
+        qq[ \n], $msg, " \n";
+    add_error ('structure', \%opt => $result);
+  });
+
+  print STDOUT qq[ 
+
Tables 
+
+
+
+
+Structure of tables are visualized here if scripting is enabled. 
+ 
+];
+  
+  require JSON;
+  
+  my $i = 0;
+  for my $table_el (@$tables) {
+    $i++;
+    print STDOUT qq[
] .
+        get_node_link ($table_el) . q[ ];
+
+    ## TODO: Make |ContentChecker| return |form_table| result
+    ## so that this script don't have to run the algorithm twice.
+    my $table = Whatpm::HTMLTable->form_table ($table_el);
+    
+    for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
+      next unless $_;
+      delete $_->{element};
+    }
+    
+    for (@{$table->{row_group}}) {
+      next unless $_;
+      next unless $_->{element};
+      $_->{type} = $_->{element}->manakai_local_name;
+      delete $_->{element};
+    }
+    
+    for (@{$table->{cell}}) {
+      next unless $_;
+      for (@{$_}) {
+        next unless $_;
+        for (@$_) {
+          $_->{id} = refaddr $_->{element} if defined $_->{element};
+          delete $_->{element};
+          $_->{is_header} = $_->{is_header} ? 1 : 0;
+        }
+      }
+    }
+        
+    print STDOUT '];
+  }
+  
+  print STDOUT qq[
+
Identifiers 
+
+
+];
+  for my $id (sort {$a cmp $b} keys %$ids) {
+    print STDOUT qq[@{[htescape $id]}].get_node_link ($_).qq[ ];
+    }
+  }
+  print STDOUT qq[ 
+
Terms 
+
+
+];
+  for my $term (sort {$a cmp $b} keys %$terms) {
+    print STDOUT qq[@{[htescape $term]} ];
+    for (@{$terms->{$term}}) {
+      print STDOUT qq[].get_node_link ($_).qq[ ];
+    }
+  }
+  print STDOUT qq[ 
+
Classes 
+
+
+];
+  for my $class (sort {$a cmp $b} keys %$classes) {
+    print STDOUT qq[@{[htescape $class]}].get_node_link ($_).qq[ ];
+    }
+  }
+  print STDOUT qq[ 
+
Result ];
+
+  if ($result->{unsupported} and $result->{conforming_max}) {  
+    print STDOUT qq[
The conformance
+        checker cannot decide whether the document is conforming or
+        not, since the document contains one or more unsupported
+        features.  The document might or might not be conforming.
];
+  } elsif ($result->{conforming_min}) {
+    print STDOUT qq[
No conformance-error is
+        found in this document.
];
+  } elsif ($result->{conforming_max}) {
+    print STDOUT qq[
This document
+        is likely non -conforming , but in rare case
+        it might be conforming.
];
+  } else {
+    print STDOUT qq[
This document is 
+        non -conforming
];
+  }
+
+  print STDOUT qq[
+
+MUST ‐level
+ErrorsSHOULD ‐level
+ErrorsWarnings Score  ];
+
+  my $must_error = 0;
+  my $should_error = 0;
+  my $warning = 0;
+  my $score_min = 0;
+  my $score_max = 0;
+  my $score_base = 20;
+  my $score_unit = $score_base / 100;
+  for (
+    [Transfer => 'transfer', ''],
+    [Character => 'char', ''],
+    [Syntax => 'syntax', '#parse-errors'],
+    [Structure => 'structure', '#document-errors'],
+  ) {
+    $must_error += ($result->{$_->[1]}->{must} += 0);
+    $should_error += ($result->{$_->[1]}->{should} += 0);
+    $warning += ($result->{$_->[1]}->{warning} += 0);
+    $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
+    $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
+
+    my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
+    my $label = $_->[0];
+    if ($result->{$_->[1]}->{must} or
+        $result->{$_->[1]}->{should} or
+        $result->{$_->[1]}->{warning} or
+        $result->{$_->[1]}->{unsupported}) {
+      $label = qq[$label ];
+    }
+
+    print STDOUT qq[$label $result->{$_->[1]}->{must}$uncertain $result->{$_->[1]}->{should}$uncertain $result->{$_->[1]}->{warning}$uncertain ];
+    if ($uncertain) {
+      print qq[−∞..$result->{$_->[1]}->{score_max} ];
+    } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
+      print qq[$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max} $result->{$_->[1]}->{score_min} ];
+    }
+  }
+
+  $score_max += $score_base;
+
+  print STDOUT qq[
+Semantics 0? 0? 0? −∞..$score_base  
+Total 
+$must_error? 
+$should_error? 
+$warning? 
+−∞..$score_max 
+
+
Important : This conformance checking service
+is under development .  The result above might be wrong .
+
+
Errors 
+
+
+<$euri >Not
+        supported @{[htescape $input->{media_type}]}
+    is not supported. 
+
+
Input Error : @{[htescape ($input->{error_status_text})]}
+
Line $err->{line}  column $err->{column}];
+    } else {
+      $err->{line} = $err->{line} - 1 || 1;
+      $r = qq[Line $err->{line} ];
+    }
+  }
+
+  if (defined $err->{node}) {
+    $r .= ' ' if length $r;
+    $r = get_node_link ($err->{node});
+  }
+
+  if (defined $err->{index}) {
+    $r .= ' ' if length $r;
+    $r .= 'Index ' . (0+$err->{index});
+  }
+
+  if (defined $err->{value}) {
+    $r .= ' ' if length $r;
+    $r .= '' . htescape ($err->{value}) . 'MUST ‐level
+        errorSHOULD ‐level
+        errorWarning Not
+        supported $elevel \$([0-9]+) }{
         defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
       }ge;
-      return ($type, $Msg->{$type}->[0], $msg);
+      $msg =~ s{{\@([A-Za-z0-9:_.-]+)} }{
+        UNIVERSAL::can ($node, 'get_attribute_ns')
+            ? htescape ($node->get_attribute_ns (undef, $1)) : ''
+      }ge;
+      $msg =~ s{{\@} }{
+        UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
+      }ge;
+      $msg =~ s{{local-name} }{
+        UNIVERSAL::can ($node, 'manakai_local_name')
+          ? htescape ($node->manakai_local_name) : ''
+      }ge;
+      $msg =~ s{{element-local-name} }{
+        (UNIVERSAL::can ($node, 'owner_element') and
+         $node->owner_element)
+          ? htescape ($node->owner_element->manakai_local_name)
+          : ''
+      }ge;
+      return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
     } elsif ($type =~ s/:([^:]*)$//) {
       unshift @arg, $1;
       redo;
     }
   }
-  return ($type, '', htescape ($_[0]));
+  return ($type, 'level-'.$level, htescape ($_[0]));
 } # get_text
 
 }
@@ -533,7 +967,7 @@
 sub get_input_document ($$) {
   my ($http, $dom) = @_;
 
-  my $request_uri = $http->parameter ('uri');
+  my $request_uri = $http->get_parameter ('uri');
   my $r = {};
   if (defined $request_uri and length $request_uri) {
     my $uri = $dom->create_uri_reference ($request_uri);
@@ -581,30 +1015,43 @@
     $ua->protocols_allowed ([qw/http/]);
     $ua->max_size (1000_000);
     my $req = HTTP::Request->new (GET => $request_uri);
+    $req->header ('Accept-Encoding' => 'identity, *; q=0');
     my $res = $ua->request ($req);
-    if ($res->is_success or $http->parameter ('error-page')) {
+    ## TODO: 401 sets |is_success| true.
+    if ($res->is_success or $http->get_parameter ('error-page')) {
       $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and