/[suikacvs]/webroot/swe/lib/SWE/Object/Document.pm
Suika

Contents of /webroot/swe/lib/SWE/Object/Document.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations) (download)
Mon Sep 21 07:09:48 2009 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.9: +47 -0 lines
++ swe/lib/SWE/Object/ChangeLog	21 Sep 2009 06:44:52 -0000
2009-09-21  Wakaba  <wakaba@suika.fam.cx>

	* Document.pm (title, get_or_create_graph_node): New methods (made
	from codes moved from main.pl).

++ swe/lib/suikawiki/ChangeLog	21 Sep 2009 07:09:35 -0000
2009-09-21  Wakaba  <wakaba@suika.fam.cx>

	* main.pl: Changed "related"/"unrelated" URLs to a |ping| end
	point.  Moved most of graph node end point processing to separate
	modules.

1 wakaba 1.1 package SWE::Object::Document;
2     use strict;
3     use warnings;
4 wakaba 1.7 use SWE::Lang qw/%ContentMediaType/;
5     use UNIVERSAL::require;
6 wakaba 1.1
7     sub new ($%) {
8     my $class = shift;
9     my $self = bless {@_}, $class;
10    
11     return $self;
12     }
13    
14 wakaba 1.3 sub new_id ($%) {
15     my $self = shift->new (@_);
16    
17 wakaba 1.4 ## NOTE: MUST be executed in names_lock.
18    
19 wakaba 1.3 my $idgen = $self->db->id;
20     my $id = $idgen->get_next_id;
21     $self->{id} = $id;
22    
23     return $self;
24     } # new_id
25    
26 wakaba 1.1 sub db { $_[0]->{db} }
27    
28     sub id { $_[0]->{id} }
29    
30 wakaba 1.7 sub reblessed : lvalue { $_[0]->{reblessed} }
31    
32     sub rebless ($) {
33     my $self = shift;
34     return if $self->reblessed;
35    
36 wakaba 1.9 my $id = $self->id;
37     my $module;
38     if (defined $id) {
39     my $ct = $self->content_media_type;
40     $module = $ContentMediaType{$ct}->{module} || $ContentMediaType{'text/x-suikawiki'}->{module};
41     } else {
42     $module = 'SWE::Object::Document::NotYet';
43     }
44 wakaba 1.7 $module->require or die $@;
45     bless $self, $module;
46    
47     $self->reblessed = 1;
48     }
49    
50 wakaba 1.10 ## ------ Metadata ------
51    
52 wakaba 1.8 sub prop ($) {
53     my $self = shift;
54     return $self->{prop} ||= $self->db->id_prop->get_data ($self->id);
55     } # prop
56    
57 wakaba 1.7 sub content_media_type ($) {
58 wakaba 1.8 my $self = shift;
59     return $self->prop->{'content-type'} // 'text/x-suikawiki';
60     } # content_media_type
61 wakaba 1.7
62 wakaba 1.1 sub associate_names ($$%) {
63     my ($self, $names, %args) = @_;
64    
65     ## NOTE: names_lock MUST be executed before the invocation.
66    
67     my $id = $self->id;
68     my $time = $args{time} || time;
69     my $sw3_pages = $self->{sw3_pages}; ## TODO: ...
70    
71     my $vc = $self->db->vc;
72    
73     my $name_prop_db = $self->{name_prop_db}; ## TODO: ...
74     local $name_prop_db->{version_control} = $vc;
75    
76     my $name_history_db = $self->db->name_history;
77     local $name_history_db->{version_control} = $vc;
78    
79     for my $name (keys %$names) {
80     my $name_props = $name_prop_db->get_data ($name);
81     unless (defined $name_props) {
82     my $sw3id = $sw3_pages->get_data ($name);
83     main::convert_sw3_page ($sw3id => $name); ## TODO: ...
84    
85     $name_props = $name_prop_db->get_data ($name);
86     unless (defined $name_props) {
87     $name_history_db->append_data ($name => [$time, 'c']);
88     }
89     }
90    
91     push @{$name_props->{id} ||= []}, $id;
92     $name_props->{name} = $name;
93     $name_prop_db->set_data ($name => $name_props);
94    
95     $name_history_db->append_data ($name => [$time, 'a', $id]);
96     }
97    
98     my $user = $args{user} || '(anon)';
99     $vc->commit_changes ("id=$id created by $user");
100     } # associate_names
101    
102 wakaba 1.10 sub title ($) {
103     my $self = shift;
104    
105     my $prop = $self->prop;
106    
107     my $title = $prop->{title};
108     return $title if defined $title and length $title;
109    
110     $title = [keys %{$prop->{name}}]->[0] // ''; ## XXXTODO: title-type
111     return $title;
112     } # title
113    
114     ## ------ Indexing and Graph ------
115    
116 wakaba 1.2 sub update_tfidf ($$) {
117     my ($self, $doc) = @_; ## TODO: $doc should not be an argument
118    
119     ## It is REQUIRED to lock the $id before the invocation of this
120     ## method to keep the consistency of tfidf data for the $id.
121    
122     my $id = $self->id;
123    
124     my $tfidf_db = $self->db->id_tfidf;
125    
126     require SWE::Data::FeatureVector;
127    
128     my $deleted_terms = SWE::Data::FeatureVector->parse_stringref
129     ($tfidf_db->get_data ($id))->as_key_hashref;
130    
131     my $tc = $doc->document_element->text_content;
132    
133     ## TODO: use element semantics...
134    
135     my $orig_tfs = {};
136     my $all_terms = 0;
137     main::for_unique_words ($tc => sub {
138     $orig_tfs->{$_[0]} = $_[1];
139     $all_terms += $_[1];
140     }); ## TODO: XXX
141    
142     my $names_index_db = $self->db->name_inverted_index;
143     $names_index_db->lock;
144    
145     my $idgen = $self->db->id;
146     my $doc_number = $idgen->get_last_id;
147    
148     my $terms = SWE::Data::FeatureVector->new;
149     for my $term (keys %$orig_tfs) {
150     my $n_tf = $orig_tfs->{$term} / $all_terms;
151    
152     my $df = $names_index_db->get_count ($term);
153     my $idf = log ($doc_number / ($df + 1));
154    
155     my $tfidf = $n_tf * $idf;
156    
157     $terms->set_tfidf ($term, $tfidf);
158     $names_index_db->add_data ($term => $id => $tfidf);
159    
160     delete $deleted_terms->{$term};
161     }
162    
163     for my $term (keys %$deleted_terms) {
164     $names_index_db->delete_data ($term, $id);
165     }
166    
167     $tfidf_db->set_data ($id => \( $terms->stringify ));
168     } # update_tfidf
169    
170 wakaba 1.10 sub get_or_create_graph_node ($) {
171     my $self = shift;
172    
173     my $db = $self->db;
174     my $doc_id = $self->id;
175    
176     my $id_prop = $db->id_prop->get_data ($doc_id);
177     return undef unless $id_prop;
178    
179     require SWE::Object::Graph;
180     my $graph = SWE::Object::Graph->new (db => $db);
181    
182     my $node;
183     my $node_id = $id_prop->{node_id};
184     if (defined $node_id) {
185     $node = $graph->get_node_by_id ($node_id);
186     } else {
187     $node = $graph->create_node ($doc_id);
188     $node_id = $node->id;
189    
190     $id_prop->{node_id} = $node_id;
191     $db->id_prop->set_data ($doc_id => $id_prop);
192     }
193    
194     return $node;
195     } # get_or_create_graph_node
196    
197     # ------ Locking ------
198    
199 wakaba 1.5 sub lock ($) {
200     my $self = shift;
201     my $lock = $self->{lock} ||= $self->{id_locks}->get_lock ($self->id); ## XXX
202     $self->{lock_n}++ or $self->lock;
203     } # lock
204    
205     sub unlock ($) {
206     my $self = shift;
207     my $lock = $self->{lock};
208     $self->{lock_n}--;
209     if ($lock and $self->{lock_n} <= 0) {
210     $lock->unlock;
211     delete $self->{lock};
212     delete $self->{lock_n};
213     }
214     } # unlock
215 wakaba 1.10
216     # ------ Format Convertion ------
217 wakaba 1.5
218 wakaba 1.8 sub to_text ($) {
219     my $self = shift;
220    
221     return $self->{content_db}->get_data ($self->id); # XXX
222     } # to_text
223    
224     sub to_text_media_type ($) {
225     my $self = shift;
226     return $self->content_media_type;
227     } # to_text_media_type
228    
229     sub to_xml_media_type ($) {
230     return undef;
231     } # to_xml_media_type
232    
233     sub to_xml ($) {
234     return undef;
235     } # to_xml
236    
237     sub to_html_fragment ($) {
238     return (undef, undef);
239     } # to_html_fragment
240 wakaba 1.6
241 wakaba 1.1 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24