| 1 |
wakaba |
1.1 |
#!/usr/bin/perl |
| 2 |
|
|
use strict; |
| 3 |
|
|
use utf8; |
| 4 |
|
|
|
| 5 |
wakaba |
1.1.2.1 |
use lib qw</home/wakaba/work/manakai/lib |
| 6 |
|
|
/home/httpd/html/www/markup/html/whatpm>; |
| 7 |
wakaba |
1.1 |
|
| 8 |
wakaba |
1.1.2.1 |
our $REPOSITORY_PATH = q</home/wakaba/pub/kkd/>; |
| 9 |
|
|
our $FEED_TAG = q<tag:2006-05-07,suika.fam.cx:norakuro/d/>; |
| 10 |
|
|
our $BASE_URI = q<http://suika.fam.cx/norakuro/d/>; |
| 11 |
wakaba |
1.1 |
our $BASE_LANG = 'ja'; |
| 12 |
wakaba |
1.1.2.1 |
our $AUTHOR_NAME = q<のらくろ>; |
| 13 |
|
|
our $AUTHOR_URI = undef; |
| 14 |
|
|
our $AUTHOR_MAIL = q<norakuro->.q<comment>.q<@>.q<suika>.q<.fam.cx>; |
| 15 |
|
|
|
| 16 |
|
|
use Message::DOM::DOMImplementation; |
| 17 |
|
|
use Whatpm::H2H; |
| 18 |
wakaba |
1.1 |
use Encode::EUCJP1997; |
| 19 |
|
|
|
| 20 |
|
|
my ($year, $month, $day) = @ARGV; |
| 21 |
|
|
$year += 0; |
| 22 |
|
|
$month += 0; |
| 23 |
|
|
$day += 0; |
| 24 |
|
|
|
| 25 |
|
|
my $atom = q<http://www.w3.org/2005/Atom>; |
| 26 |
|
|
my $fe = q<http://suika.fam.cx/www/2006/feature/>; |
| 27 |
|
|
my $cfg = q<http://suika.fam.cx/www/2006/dom-config/>; |
| 28 |
|
|
my $html = q<http://www.w3.org/1999/xhtml>; |
| 29 |
|
|
my $xml = q<http://www.w3.org/XML/1998/namespace>; |
| 30 |
|
|
my $xhtml2 = q<http://www.w3.org/2002/06/xhtml2/>; |
| 31 |
|
|
my $h2h = q<http://suika.fam.cx/~wakaba/archive/2005/manakai/Markup/H2H/>; |
| 32 |
|
|
my $xmlns = q<http://www.w3.org/2000/xmlns/>; |
| 33 |
|
|
|
| 34 |
wakaba |
1.1.2.1 |
my $impl = Message::DOM::DOMImplementation->new; |
| 35 |
wakaba |
1.1 |
|
| 36 |
|
|
my $base_file_name = $REPOSITORY_PATH.sprintf ('%04d/d%04d%02d%02d', |
| 37 |
|
|
$year, $year, $month, $day); |
| 38 |
|
|
|
| 39 |
|
|
my $h2h_data = ''; |
| 40 |
|
|
if (-f $base_file_name.'.hnf') { |
| 41 |
|
|
local $/ = undef; |
| 42 |
|
|
open my $h2h_file, '<', $base_file_name.'.hnf' |
| 43 |
|
|
or die "$0: $base_file_name.hnf: $!"; |
| 44 |
|
|
$h2h_data = Encode::decode ('euc-jp-1997', <$h2h_file>); |
| 45 |
|
|
close $h2h_file; |
| 46 |
|
|
} |
| 47 |
|
|
|
| 48 |
wakaba |
1.1.2.1 |
my $h2h_doc = Whatpm::H2H->parse_string ($h2h_data => $impl->create_document); |
| 49 |
wakaba |
1.1 |
|
| 50 |
|
|
my $i = 0; |
| 51 |
|
|
|
| 52 |
|
|
for my $section (@{$h2h_doc->get_elements_by_tag_name_ns ($html, 'body') |
| 53 |
|
|
->[0]->child_nodes}) { |
| 54 |
|
|
next if $section->node_type != $section->ELEMENT_NODE; |
| 55 |
|
|
next if $section->local_name != 'section'; |
| 56 |
|
|
next if $section->namespace_uri != $html; |
| 57 |
|
|
|
| 58 |
|
|
$i++; |
| 59 |
|
|
|
| 60 |
wakaba |
1.1.2.1 |
my $atom_doc = $impl->create_atom_entry_document |
| 61 |
wakaba |
1.1 |
($FEED_TAG.$year.':'.$month.':'.$day.':'.$i); |
| 62 |
|
|
$atom_doc->dom_config->set_parameter ($cfg.'create-child-element' => 1); |
| 63 |
|
|
|
| 64 |
|
|
my $atom_entry = $atom_doc->document_element; |
| 65 |
|
|
$atom_entry->set_attribute_ns ($xml, 'xml:base', $BASE_URI); |
| 66 |
|
|
$atom_entry->set_attribute_ns ($xml, 'xml:lang', $BASE_LANG); |
| 67 |
|
|
$atom_entry->set_attribute_ns ($xmlns, xmlns => $html); |
| 68 |
|
|
|
| 69 |
|
|
my $atom_content = $atom_entry->content_element; |
| 70 |
|
|
$atom_content->type ('xhtml'); |
| 71 |
|
|
|
| 72 |
|
|
my $atom_container = $atom_content->container; |
| 73 |
|
|
|
| 74 |
|
|
my $hour = 0; |
| 75 |
|
|
my $minute = 0; |
| 76 |
|
|
my $tz = '-00:00'; |
| 77 |
|
|
my @section_children = @{$section->child_nodes}; |
| 78 |
|
|
for my $el (@section_children) { |
| 79 |
|
|
next if $el->node_type != $el->ELEMENT_NODE; |
| 80 |
|
|
|
| 81 |
|
|
my $xuri = $el->manakai_expanded_uri; |
| 82 |
|
|
|
| 83 |
|
|
if ($xuri eq $xhtml2.'h') { |
| 84 |
|
|
my $atom_title = $atom_entry->title_element; |
| 85 |
|
|
$atom_title->type ('xhtml'); |
| 86 |
|
|
my $atom_title_container = $atom_title->container; |
| 87 |
|
|
my @el_children = @{$el->child_nodes}; |
| 88 |
|
|
for (@el_children) { |
| 89 |
|
|
$atom_title_container->append_child ($atom_doc->adopt_node ($_)); |
| 90 |
|
|
} |
| 91 |
|
|
|
| 92 |
|
|
if ($atom_title_container->text_content |
| 93 |
|
|
=~ /\(\@(\d\d):(\d\d) ([+-]\d\d:\d\d)\)\s*$/) { |
| 94 |
|
|
$hour = $1 + 0; |
| 95 |
|
|
$minute = $2 + 0; |
| 96 |
|
|
$tz = $3; |
| 97 |
|
|
} |
| 98 |
|
|
} elsif ($xuri eq $h2h.'cat') { |
| 99 |
|
|
## TODO: |
| 100 |
|
|
} else { |
| 101 |
|
|
$atom_container->append_child ($atom_doc->adopt_node ($el)); |
| 102 |
|
|
} |
| 103 |
|
|
} |
| 104 |
|
|
|
| 105 |
|
|
$atom_entry->published_element->text_content |
| 106 |
|
|
(sprintf '%04d-%02d-%02dT%02d:%02d:00%s', |
| 107 |
|
|
$year, $month, $day, $hour, $minute, $tz); |
| 108 |
|
|
|
| 109 |
|
|
for my $author_el ($atom_entry->append_child |
| 110 |
|
|
($atom_doc->create_element_ns ($atom, 'author'))) { |
| 111 |
|
|
$author_el->name ($AUTHOR_NAME); |
| 112 |
|
|
$author_el->uri ($AUTHOR_URI) if defined $AUTHOR_URI; |
| 113 |
|
|
$author_el->email ($AUTHOR_MAIL) if defined $AUTHOR_MAIL; |
| 114 |
|
|
} |
| 115 |
|
|
|
| 116 |
|
|
for my $link_el ($atom_entry->append_child |
| 117 |
|
|
($atom_doc->create_element_ns ($atom, 'link'))) { |
| 118 |
|
|
$link_el->rel ('alternate'); |
| 119 |
|
|
$link_el->href (sprintf 'd%04d%02d.%s.html#d%d-%d', |
| 120 |
|
|
$year, $month, $BASE_LANG, $day, $i); |
| 121 |
|
|
$link_el->type ('text/html'); |
| 122 |
|
|
$link_el->hreflang ($BASE_LANG); |
| 123 |
|
|
} |
| 124 |
|
|
|
| 125 |
|
|
for my $link_el ($atom_entry->append_child |
| 126 |
|
|
($atom_doc->create_element_ns ($atom, 'link'))) { |
| 127 |
|
|
$link_el->rel ('self'); |
| 128 |
|
|
$link_el->href (sprintf '%04d/d%04d%02d%02d-%d.%s.atom', |
| 129 |
|
|
$year, $year, $month, $day, $i, $BASE_LANG); |
| 130 |
|
|
$link_el->type ('application/atom+xml'); |
| 131 |
|
|
$link_el->hreflang ($BASE_LANG); |
| 132 |
|
|
} |
| 133 |
|
|
|
| 134 |
|
|
my $entry_file_name = $base_file_name . '-' . $i . '.'.$BASE_LANG.'.atom'; |
| 135 |
|
|
|
| 136 |
|
|
open my $entry_file, '>', $entry_file_name |
| 137 |
|
|
or die "$0: $entry_file_name: $!"; |
| 138 |
|
|
|
| 139 |
|
|
warn qq<Write to "$entry_file_name"\n>; |
| 140 |
wakaba |
1.1.2.1 |
print $entry_file Encode::encode ('utf-8', $atom_doc->inner_html); |
| 141 |
wakaba |
1.1 |
close $entry_file; |
| 142 |
|
|
system 'chmod', 'go+r', $entry_file_name; |
| 143 |
|
|
$? == -1 and die "$0: chmod $entry_file_name: $!"; |
| 144 |
|
|
} |
| 145 |
|
|
|
| 146 |
|
|
|