/[suikacvs]/webroot/www/harusame/bin/harusame.pl
Suika

Contents of /webroot/www/harusame/bin/harusame.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Tue Oct 21 10:14:26 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +75 -2 lines
File MIME type: text/plain
++ harusame/bin/ChangeLog	21 Oct 2008 09:03:25 -0000
	* harusame.pl: Documentation added.  Use |require| instead of
	|use| so that |--help| option doesn't require manakai module.

	* Makefile: Rule to make |harusame.html| is added.

2008-10-21  Wakaba  <wakaba@suika.fam.cx>

++ ChangeLog	21 Oct 2008 10:14:06 -0000
	* readme.html.src: New file.

	* Makefile: New file.

2008-10-21  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3
4 use Getopt::Long;
5 use Pod::Usage;
6
7 my $lang;
8
9 GetOptions (
10 'lang=s' => \$lang,
11 'help' => sub {
12 pod2usage (-exitval => 0, -verbose => 2);
13 },
14 ) or pod2usage (-exitval => 1, -verbose => 1);
15
16 pod2usage (-exitval => 1, -verbose => 1,
17 -msg => "Required argument --lang is not specified.\n")
18 unless defined $lang;
19 $lang =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
20
21 require Message::DOM::DOMImplementation;
22 my $dom = Message::DOM::DOMImplementation->new;
23
24 my $doc = $dom->create_document;
25 $doc->manakai_is_html (1);
26
27 {
28 binmode STDIN, ':encoding(utf8)';
29 local $/ = undef;
30 $doc->inner_html (scalar <STDIN>);
31 }
32
33 my @remove;
34
35 my $containers = $doc->query_selector_all
36 ('[data-lang-container], [data-lang-content]');
37 for my $el (@$containers) {
38 next unless $el->parent_node;
39
40 if ($el->has_attribute ('data-lang-container')) {
41 my $content = get_content_element ($el);
42 next unless $content;
43 if ($el->get_attribute ('data-lang-container') eq 'replace') {
44 $el->parent_node->replace_child ($content, $el);
45 } else {
46 $el->text_content ('');
47 $el->append_child ($content);
48 }
49 } elsif ($el->has_attribute ('data-lang-content')) {
50 my $idref = $el->get_attribute ('data-lang-content');
51 my $container = $doc->get_element_by_id ($idref);
52 unless ($container) {
53 warn "Element with ID $idref not found\n";
54 next;
55 }
56
57 my $content = get_content_element ($container);
58 $el->text_content ($content->text_content);
59 $el->manakai_html_language ($content->manakai_html_language);
60
61 if ($container->has_attribute ('data-lang-declaration')) {
62 push @remove, $container;
63 }
64 }
65 }
66
67 for (@remove) {
68 next unless $_->parent_node;
69 $_->parent_node->remove_child ($_);
70 }
71
72 $doc->document_element->manakai_html_language ($lang);
73
74 binmode STDOUT, ':encoding(utf8)';
75 print STDOUT $doc->inner_html;
76
77 sub get_content_element ($) {
78 my $container = shift;
79
80 my $c_el;
81 for my $e (@{$container->child_nodes}) {
82 next unless $e->node_type == $e->ELEMENT_NODE;
83 my $e_lang = $e->manakai_html_language;
84 $e_lang =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
85 if ($e_lang eq $lang) {
86 $c_el = $e;
87 last;
88 } else {
89 $c_el ||= $e;
90 }
91 }
92
93 return $c_el;
94 } # get_content_element
95
96 __END__
97
98 =head1 NAME
99
100 harusame.pl - Multilingual Web page management tool
101
102 =head1 SYNOPSIS
103
104 perl harusame.pl --lang LANGCODE < input.html > output.html
105
106 perl harusame.pl --help
107
108 =head1 DESCRIPTION
109
110 The C<harusame.pl> script extracts a version of the HTML document
111 written in the specified natural language, from a source HTML document
112 that contains paragraphs in multiple natural languages.
113
114 The document management of a multilingual Web site where there are
115 multiple versions of a (conceptually same) document is somewhat
116 difficult in general. If the author of an HTML document wants to edit
117 a part of the document, then he or she has to ensure not to forget
118 updating translations at the same time, otherwise documents in
119 different language versions also differ in their content versions.
120
121 Using the C<harusame.pl>, one can generate versions of an HTML
122 document in different language from one source HTML document that
123 contains paragraphs written in all of those languages, such that
124 authors no longer have to manage different content versions and
125 different language versions in separate files.
126
127 =head1 ARGUMENTS
128
129 The source document must be provided to the script using the
130 I<standard input>. It must be encoded in UTF-8.
131
132 The script outputs the generated document encoded in UTF-8 to the
133 I<standard output>.
134
135 Following command-line options are available to this script:
136
137 =over 4
138
139 =item C<--help>
140
141 Show the help message and exit.
142
143 =item C<--lang I<LANGCODE>> (B<REQUIRED>)
144
145 The language of the version to generate. This option must be
146 specified. The value must be a value that is valid for HTML
147 C<lang=""> attribute.
148
149 =back
150
151 =head1 SEE ALSO
152
153 Readme L<http://suika.fam.cx/www/harusame/readme>. How to mark up the
154 source HTML document is described in this document.
155
156 =head1 AUTHOR
157
158 Wakaba <w@suika.fam.cx>.
159
160 =head1 LICENSE
161
162 Copyright 2008 Wakaba <w@suika.fam.cx>.
163
164 This library is free software; you can redistribute it and/or modify
165 it under the same terms as Perl itself.
166
167 =cut
168
169 ## $Date: 2008/10/21 08:36:59 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24