Web Document Conformance Checker (BETA)

Web Document Conformance Checker +(beta)

Tables

+ + + + +]; + + require JSON; + + my $i = 0; + for my $table (@$tables) { + $i++; + print STDOUT qq[

] . + get_node_link ($input, $table->{element}) . q[

]; + + delete $table->{element}; + + for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}, + @{$table->{row}}) { + next unless $_; + delete $_->{element}; + } + + for (@{$table->{row_group}}) { + next unless $_; + next unless $_->{element}; + $_->{type} = $_->{element}->manakai_local_name; + delete $_->{element}; + } + + for (@{$table->{cell}}) { + next unless $_; + for (@{$_}) { + next unless $_; + for (@$_) { + $_->{id} = refaddr $_->{element} if defined $_->{element}; + delete $_->{element}; + $_->{is_header} = $_->{is_header} ? 1 : 0; + } } - - unshift @node, - map { [$_, $child->[1] . ' '] } @{$child->[0]->child_nodes}; - } elsif ($nt == $child->[0]->TEXT_NODE) { - $r .= '| ' . $child->[1] . '"' . $child->[0]->data . '"' . "\x0A"; - } elsif ($nt == $child->[0]->CDATA_SECTION_NODE) { - $r .= '| ' . $child->[1] . '[0]->data . "]]>\x0A"; - } elsif ($nt == $child->[0]->COMMENT_NODE) { - $r .= '| ' . $child->[1] . '\x0A"; - } elsif ($nt == $child->[0]->DOCUMENT_TYPE_NODE) { - $r .= '| ' . $child->[1] . '[0]->name . ">\x0A"; - } elsif ($nt == $child->[0]->PROCESSING_INSTRUCTION_NODE) { - $r .= '| ' . $child->[1] . '[0]->target . ' ' . - $child->[0]->data . "?>\x0A"; - } else { - $r .= '| ' . $child->[1] . $child->[0]->node_type . "\x0A"; # error } + + print STDOUT '

]; } - return \$r; -} # test_serialize + print STDOUT qq[

$opt->{heading}

+ +

@{[htescape $id]}: ].get_node_link ($input, $_).qq[

RDF Triples

+ +

]; + print STDOUT get_node_link ($input, $rdf->[0]); + print STDOUT qq[

' . get_node_link ($input, $triple->[0]) . ': '; + print STDOUT get_rdf_resource_html ($triple->[1]); + print STDOUT ' '; + print STDOUT get_rdf_resource_html ($triple->[2]); + print STDOUT ' '; + print STDOUT get_rdf_resource_html ($triple->[3]); + } + print STDOUT qq[

]; + } + print STDOUT qq[

] . htescape ($resource->{value}) . '

\$([0-9]+)

{\@([A-Za-z0-9:_.-]+)}

{\@}

{local-name}

{element-local-name}

check ($uri->uri_host, $uri->uri_port || 80)) { + my $r = WebHACC::Input::Error->new; + $r->{uri} = $request_uri; + $r->{request_uri} = $request_uri; + $r->{error_status_text} = 'Connection to the host is forbidden'; + return $r; + } + + require LWP::UserAgent; + my $ua = WDCC::LWPUA->new; + $ua->{wdcc_dom} = $dom; + $ua->{wdcc_host_permit} = $host_permit; + $ua->agent ('Mozilla'); ## TODO: for now. + $ua->parse_head (0); + $ua->protocols_allowed ([qw/http/]); + $ua->max_size (1000_000); + my $req = HTTP::Request->new (GET => $request_uri); + $req->header ('Accept-Encoding' => 'identity, *; q=0'); + my $res = $ua->request ($req); + ## TODO: 401 sets |is_success| true. + if ($res->is_success or $http->get_parameter ('error-page')) { + $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and . ## TODO: Use our own code! + $r->{uri} = $res->request->uri; + $r->{request_uri} = $request_uri; + + ## TODO: More strict parsing... + my $ct = $res->header ('Content-Type'); + if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) { + $r->{charset} = lc $1; + $r->{charset} =~ tr/\\//d; + $r->{official_charset} = $r->{charset}; + } + + my $input_charset = $http->get_parameter ('charset'); + if (defined $input_charset and length $input_charset) { + $r->{charset_overridden} + = (not defined $r->{charset} or $r->{charset} ne $input_charset); + $r->{charset} = $input_charset; + } + + ## TODO: Support for HTTP Content-Encoding + + $r->{s} = ''.$res->content; + + require Whatpm::ContentType; + ($r->{official_type}, $r->{media_type}) + = Whatpm::ContentType->get_sniffed_type + (get_file_head => sub { + return substr $r->{s}, 0, shift; + }, + http_content_type_byte => $ct, + has_http_content_encoding => + defined $res->header ('Content-Encoding'), + supported_image_types => {}); } else { - $rs = '#' . $node->node_type; - $node = $node->parent_node; + $r->{uri} = $res->request->uri; + $r->{request_uri} = $request_uri; + $r->{error_status_text} = $res->status_line; } - unshift @r, $rs; + + $r->{header_field} = []; + $res->scan (sub { + push @{$r->{header_field}}, [$_[0], $_[1]]; + }); + $r->{header_status_code} = $res->code; + $r->{header_status_text} = $res->message; + } else { + $r->{s} = ''.$http->get_parameter ('s'); + $r->{uri} = q; + $r->{request_uri} = q; + $r->{base_uri} = q; + $r->{charset} = ''.$http->get_parameter ('_charset_'); + $r->{charset} =~ s/\s+//g; + $r->{charset} = 'utf-8' if $r->{charset} eq ''; + $r->{official_charset} = $r->{charset}; + $r->{header_field} = []; + + require Whatpm::ContentType; + ($r->{official_type}, $r->{media_type}) + = Whatpm::ContentType->get_sniffed_type + (get_file_head => sub { + return substr $r->{s}, 0, shift; + }, + http_content_type_byte => undef, + has_http_content_encoding => 0, + supported_image_types => {}); + } + + my $input_format = $http->get_parameter ('i'); + if (defined $input_format and length $input_format) { + $r->{media_type_overridden} + = (not defined $r->{media_type} or $input_format ne $r->{media_type}); + $r->{media_type} = $input_format; + } + if (defined $r->{s} and not defined $r->{media_type}) { + $r->{media_type} = 'text/html'; + $r->{media_type_overridden} = 1; + } + + if ($r->{media_type} eq 'text/xml') { + unless (defined $r->{charset}) { + $r->{charset} = 'us-ascii'; + $r->{official_charset} = $r->{charset}; + } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') { + $r->{charset_overridden} = 0; + } + } + + if (length $r->{s} > 1000_000) { + $r->{error_status_text} = 'Entity-body too large'; + delete $r->{s}; + return $r; + } + + $r->{inner_html_element} = $http->get_parameter ('e'); + + return $r; +} # get_input_document + +package WDCC::LWPUA; +BEGIN { push our @ISA, 'LWP::UserAgent'; } + +sub redirect_ok { + my $ua = shift; + unless ($ua->SUPER::redirect_ok (@_)) { + return 0; + } + + my $uris = $_[1]->header ('Location'); + return 0 unless $uris; + my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris); + unless ({ + http => 1, + }->{lc $uri->uri_scheme}) { + return 0; + } + unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) { + return 0; } - return join '/', @r; -} # get_node_path + return 1; +} # redirect_ok =head1 AUTHOR @@ -229,11 +536,11 @@ =head1 LICENSE -Copyright 2007 Wakaba +Copyright 2007-2008 Wakaba This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut -## $Date: 2007/06/27 11:08:03 $ +## $Date: 2008/07/21 05:24:32 $

Web Document Conformance Checker +(beta)

Tables

] . + get_node_link ($input, $table->{element}) . q[

$opt->{heading}

RDF Triples