/[suikacvs]/webroot/www/harusame/bin/harusame.pl
Suika

Contents of /webroot/www/harusame/bin/harusame.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Tue Oct 21 10:14:26 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +75 -2 lines
File MIME type: text/plain
++ harusame/bin/ChangeLog	21 Oct 2008 09:03:25 -0000
	* harusame.pl: Documentation added.  Use |require| instead of
	|use| so that |--help| option doesn't require manakai module.

	* Makefile: Rule to make |harusame.html| is added.

2008-10-21  Wakaba  <wakaba@suika.fam.cx>

++ ChangeLog	21 Oct 2008 10:14:06 -0000
	* readme.html.src: New file.

	* Makefile: New file.

2008-10-21  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     use Getopt::Long;
5     use Pod::Usage;
6    
7     my $lang;
8    
9     GetOptions (
10     'lang=s' => \$lang,
11     'help' => sub {
12     pod2usage (-exitval => 0, -verbose => 2);
13     },
14     ) or pod2usage (-exitval => 1, -verbose => 1);
15    
16     pod2usage (-exitval => 1, -verbose => 1,
17     -msg => "Required argument --lang is not specified.\n")
18     unless defined $lang;
19     $lang =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
20    
21 wakaba 1.2 require Message::DOM::DOMImplementation;
22 wakaba 1.1 my $dom = Message::DOM::DOMImplementation->new;
23    
24     my $doc = $dom->create_document;
25     $doc->manakai_is_html (1);
26    
27     {
28     binmode STDIN, ':encoding(utf8)';
29     local $/ = undef;
30     $doc->inner_html (scalar <STDIN>);
31     }
32    
33     my @remove;
34    
35     my $containers = $doc->query_selector_all
36     ('[data-lang-container], [data-lang-content]');
37     for my $el (@$containers) {
38     next unless $el->parent_node;
39    
40     if ($el->has_attribute ('data-lang-container')) {
41     my $content = get_content_element ($el);
42     next unless $content;
43     if ($el->get_attribute ('data-lang-container') eq 'replace') {
44     $el->parent_node->replace_child ($content, $el);
45     } else {
46     $el->text_content ('');
47     $el->append_child ($content);
48     }
49     } elsif ($el->has_attribute ('data-lang-content')) {
50     my $idref = $el->get_attribute ('data-lang-content');
51     my $container = $doc->get_element_by_id ($idref);
52     unless ($container) {
53     warn "Element with ID $idref not found\n";
54     next;
55     }
56    
57     my $content = get_content_element ($container);
58     $el->text_content ($content->text_content);
59     $el->manakai_html_language ($content->manakai_html_language);
60    
61     if ($container->has_attribute ('data-lang-declaration')) {
62     push @remove, $container;
63     }
64     }
65     }
66    
67     for (@remove) {
68     next unless $_->parent_node;
69     $_->parent_node->remove_child ($_);
70     }
71    
72     $doc->document_element->manakai_html_language ($lang);
73    
74     binmode STDOUT, ':encoding(utf8)';
75     print STDOUT $doc->inner_html;
76    
77     sub get_content_element ($) {
78     my $container = shift;
79    
80     my $c_el;
81     for my $e (@{$container->child_nodes}) {
82     next unless $e->node_type == $e->ELEMENT_NODE;
83     my $e_lang = $e->manakai_html_language;
84     $e_lang =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
85     if ($e_lang eq $lang) {
86     $c_el = $e;
87     last;
88     } else {
89     $c_el ||= $e;
90     }
91     }
92    
93     return $c_el;
94     } # get_content_element
95    
96 wakaba 1.2 __END__
97    
98     =head1 NAME
99    
100     harusame.pl - Multilingual Web page management tool
101    
102     =head1 SYNOPSIS
103    
104     perl harusame.pl --lang LANGCODE < input.html > output.html
105    
106     perl harusame.pl --help
107    
108     =head1 DESCRIPTION
109    
110     The C<harusame.pl> script extracts a version of the HTML document
111     written in the specified natural language, from a source HTML document
112     that contains paragraphs in multiple natural languages.
113    
114     The document management of a multilingual Web site where there are
115     multiple versions of a (conceptually same) document is somewhat
116     difficult in general. If the author of an HTML document wants to edit
117     a part of the document, then he or she has to ensure not to forget
118     updating translations at the same time, otherwise documents in
119     different language versions also differ in their content versions.
120    
121     Using the C<harusame.pl>, one can generate versions of an HTML
122     document in different language from one source HTML document that
123     contains paragraphs written in all of those languages, such that
124     authors no longer have to manage different content versions and
125     different language versions in separate files.
126    
127     =head1 ARGUMENTS
128    
129     The source document must be provided to the script using the
130     I<standard input>. It must be encoded in UTF-8.
131    
132     The script outputs the generated document encoded in UTF-8 to the
133     I<standard output>.
134    
135     Following command-line options are available to this script:
136    
137     =over 4
138    
139     =item C<--help>
140    
141     Show the help message and exit.
142    
143     =item C<--lang I<LANGCODE>> (B<REQUIRED>)
144    
145     The language of the version to generate. This option must be
146     specified. The value must be a value that is valid for HTML
147     C<lang=""> attribute.
148    
149     =back
150    
151     =head1 SEE ALSO
152    
153     Readme L<http://suika.fam.cx/www/harusame/readme>. How to mark up the
154     source HTML document is described in this document.
155    
156     =head1 AUTHOR
157    
158     Wakaba <w@suika.fam.cx>.
159    
160     =head1 LICENSE
161    
162     Copyright 2008 Wakaba <w@suika.fam.cx>.
163    
164     This library is free software; you can redistribute it and/or modify
165     it under the same terms as Perl itself.
166    
167     =cut
168    
169     ## $Date: 2008/10/21 08:36:59 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24