/[suikacvs]/webroot/swe/lib/SWE/Object/Document.pm
Suika

Contents of /webroot/swe/lib/SWE/Object/Document.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations) (download)
Mon Sep 14 02:03:25 2009 UTC (15 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.5: +56 -0 lines
++ swe/lib/SWE/Object/ChangeLog	14 Sep 2009 02:03:14 -0000
	* Document.pm (to_html_fragment): New method.

2009-09-14  Wakaba  <wakaba@suika.fam.cx>

++ swe/lib/suikawiki/ChangeLog	14 Sep 2009 02:03:01 -0000
	* main.pl: Moved format=html processing to external module.

2009-09-14  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package SWE::Object::Document;
2     use strict;
3     use warnings;
4    
5     sub new ($%) {
6     my $class = shift;
7     my $self = bless {@_}, $class;
8    
9     return $self;
10     }
11    
12 wakaba 1.3 sub new_id ($%) {
13     my $self = shift->new (@_);
14    
15 wakaba 1.4 ## NOTE: MUST be executed in names_lock.
16    
17 wakaba 1.3 my $idgen = $self->db->id;
18     my $id = $idgen->get_next_id;
19     $self->{id} = $id;
20    
21     return $self;
22     } # new_id
23    
24 wakaba 1.1 sub db { $_[0]->{db} }
25    
26     sub id { $_[0]->{id} }
27    
28     sub associate_names ($$%) {
29     my ($self, $names, %args) = @_;
30    
31     ## NOTE: names_lock MUST be executed before the invocation.
32    
33     my $id = $self->id;
34     my $time = $args{time} || time;
35     my $sw3_pages = $self->{sw3_pages}; ## TODO: ...
36    
37     my $vc = $self->db->vc;
38    
39     my $name_prop_db = $self->{name_prop_db}; ## TODO: ...
40     local $name_prop_db->{version_control} = $vc;
41    
42     my $name_history_db = $self->db->name_history;
43     local $name_history_db->{version_control} = $vc;
44    
45     for my $name (keys %$names) {
46     my $name_props = $name_prop_db->get_data ($name);
47     unless (defined $name_props) {
48     my $sw3id = $sw3_pages->get_data ($name);
49     main::convert_sw3_page ($sw3id => $name); ## TODO: ...
50    
51     $name_props = $name_prop_db->get_data ($name);
52     unless (defined $name_props) {
53     $name_history_db->append_data ($name => [$time, 'c']);
54     }
55     }
56    
57     push @{$name_props->{id} ||= []}, $id;
58     $name_props->{name} = $name;
59     $name_prop_db->set_data ($name => $name_props);
60    
61     $name_history_db->append_data ($name => [$time, 'a', $id]);
62     }
63    
64     my $user = $args{user} || '(anon)';
65     $vc->commit_changes ("id=$id created by $user");
66     } # associate_names
67    
68 wakaba 1.2 sub update_tfidf ($$) {
69     my ($self, $doc) = @_; ## TODO: $doc should not be an argument
70    
71     ## It is REQUIRED to lock the $id before the invocation of this
72     ## method to keep the consistency of tfidf data for the $id.
73    
74     my $id = $self->id;
75    
76     my $tfidf_db = $self->db->id_tfidf;
77    
78     require SWE::Data::FeatureVector;
79    
80     my $deleted_terms = SWE::Data::FeatureVector->parse_stringref
81     ($tfidf_db->get_data ($id))->as_key_hashref;
82    
83     my $tc = $doc->document_element->text_content;
84    
85     ## TODO: use element semantics...
86    
87     my $orig_tfs = {};
88     my $all_terms = 0;
89     main::for_unique_words ($tc => sub {
90     $orig_tfs->{$_[0]} = $_[1];
91     $all_terms += $_[1];
92     }); ## TODO: XXX
93    
94     my $names_index_db = $self->db->name_inverted_index;
95     $names_index_db->lock;
96    
97     my $idgen = $self->db->id;
98     my $doc_number = $idgen->get_last_id;
99    
100     my $terms = SWE::Data::FeatureVector->new;
101     for my $term (keys %$orig_tfs) {
102     my $n_tf = $orig_tfs->{$term} / $all_terms;
103    
104     my $df = $names_index_db->get_count ($term);
105     my $idf = log ($doc_number / ($df + 1));
106    
107     my $tfidf = $n_tf * $idf;
108    
109     $terms->set_tfidf ($term, $tfidf);
110     $names_index_db->add_data ($term => $id => $tfidf);
111    
112     delete $deleted_terms->{$term};
113     }
114    
115     for my $term (keys %$deleted_terms) {
116     $names_index_db->delete_data ($term, $id);
117     }
118    
119     $tfidf_db->set_data ($id => \( $terms->stringify ));
120     } # update_tfidf
121    
122 wakaba 1.5 sub to_text ($) {
123     my $self = shift;
124    
125     return $self->{content_db}->get_data ($self->id); # XXX
126     } # to_text
127    
128     sub to_text_media_type ($) {
129     my $self = shift;
130     my $id_prop = $self->{id_prop_db}->get_data ($self->id); ## XXX
131     return $id_prop->{'content-type'} // 'text/x-suikawiki';
132     } # to_text_media_type
133    
134     sub lock ($) {
135     my $self = shift;
136     my $lock = $self->{lock} ||= $self->{id_locks}->get_lock ($self->id); ## XXX
137     $self->{lock_n}++ or $self->lock;
138     } # lock
139    
140     sub unlock ($) {
141     my $self = shift;
142     my $lock = $self->{lock};
143     $self->{lock_n}--;
144     if ($lock and $self->{lock_n} <= 0) {
145     $lock->unlock;
146     delete $self->{lock};
147     delete $self->{lock_n};
148     }
149     } # unlock
150    
151     sub to_xml ($;%) {
152     my ($self, %args) = @_;
153    
154     my $id = $self->id;
155    
156     $self->lock;
157    
158     my $id_prop = $self->{id_prop_db}->get_data ($id); ## XXX
159     my $cache_prop = $self->{cache_prop_db}->get_data ($id); ## XXX
160     my $doc = $self->{swml_to_xml}->($id, $id_prop, $cache_prop); ## XXX
161    
162     $self->unlock;
163    
164     if ($args{styled}) {
165     my $pi = $doc->create_processing_instruction
166     ('xml-stylesheet', 'href="http://suika.fam.cx/www/style/swml/structure"');
167     $doc->insert_before ($pi, $doc->first_child);
168     }
169    
170     return $doc;
171     }
172    
173     sub to_xml_media_type ($) {
174     return 'application/xml';
175     }
176    
177 wakaba 1.6 sub to_html_fragment ($) {
178     my $self = shift;
179    
180     my ($html_doc, $html_container);
181    
182     $self->lock;
183    
184     require SWE::Lang::XML2HTML;
185     my $html_converter_version = $SWE::Lang::XML2HTML::ConverterVersion;
186    
187     my $id = $self->id;
188     my $id_prop = $self->{id_prop_db}->get_data ($id); # XXX
189     my $cache_prop = $self->{cache_prop_db}->get_data ($id); # XXX
190    
191     my $html_cache_version = $cache_prop->{'html-cache-version'};
192     if (defined $html_cache_version and
193     $html_cache_version >= $html_converter_version) {
194     my $html_cached_hash = $cache_prop->{'html-cached-hash'} // 'x';
195     my $current_hash = $id_prop->{hash} // '';
196     if ($html_cached_hash eq $current_hash) {
197     $html_doc = $self->db->id_html_cache->get_data ($id);
198     }
199     }
200    
201     unless ($html_doc) {
202     my $xml_doc = $self->to_xml;
203     if ($xml_doc) {
204     require Message::DOM::DOMImplementation;
205     my $dom = Message::DOM::DOMImplementation->new;
206     $html_doc = $dom->create_document;
207     $html_doc->strict_error_checking (0);
208     $html_doc->dom_config->set_parameter
209     ('http://suika.fam.cx/www/2006/dom-config/strict-document-children' => 0);
210     $html_doc->manakai_is_html (1);
211    
212     $html_container = SWE::Lang::XML2HTML->convert
213     ($self->{name}, $xml_doc => $html_doc, $self->{get_page_url}, 2); # XXX
214    
215     $self->db->id_html_cache->set_data ($id => $html_container);
216     $cache_prop->{'html-cached-hash'} = $id_prop->{hash};
217     $cache_prop->{'html-cache-version'} = $html_converter_version;
218     $self->{cache_prop_db}->set_data ($id => $cache_prop); # XXX
219     }
220     } else {
221     $html_doc->manakai_is_html (1);
222     $html_container = $html_doc->create_document_fragment;
223     while (@{$html_doc->child_nodes}) {
224     $html_container->append_child ($html_doc->first_child);
225     }
226     }
227    
228     $self->unlock;
229    
230     return ($html_doc, $html_container);
231     }
232    
233 wakaba 1.1 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24