/[suikacvs]/webroot/swe/lib/SWE/Object/Document.pm
Suika

Contents of /webroot/swe/lib/SWE/Object/Document.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Sun Jul 12 10:46:18 2009 UTC (15 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +10 -0 lines
++ swe/lib/SWE/Object/ChangeLog	12 Jul 2009 10:46:07 -0000
	* Document.pm (new_id): New method.

2009-07-12  Wakaba  <wakaba@suika.fam.cx>

++ swe/lib/suikawiki/ChangeLog	12 Jul 2009 10:45:55 -0000
	* main.pl: Moved the code to obtain the new ID to another module.

2009-07-12  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package SWE::Object::Document;
2     use strict;
3     use warnings;
4    
5     sub new ($%) {
6     my $class = shift;
7     my $self = bless {@_}, $class;
8    
9     return $self;
10     }
11    
12 wakaba 1.3 sub new_id ($%) {
13     my $self = shift->new (@_);
14    
15     my $idgen = $self->db->id;
16     my $id = $idgen->get_next_id;
17     $self->{id} = $id;
18    
19     return $self;
20     } # new_id
21    
22 wakaba 1.1 sub db { $_[0]->{db} }
23    
24     sub id { $_[0]->{id} }
25    
26     sub associate_names ($$%) {
27     my ($self, $names, %args) = @_;
28    
29     ## NOTE: names_lock MUST be executed before the invocation.
30    
31     my $id = $self->id;
32     my $time = $args{time} || time;
33     my $sw3_pages = $self->{sw3_pages}; ## TODO: ...
34    
35     my $vc = $self->db->vc;
36    
37     my $name_prop_db = $self->{name_prop_db}; ## TODO: ...
38     local $name_prop_db->{version_control} = $vc;
39    
40     my $name_history_db = $self->db->name_history;
41     local $name_history_db->{version_control} = $vc;
42    
43     for my $name (keys %$names) {
44     my $name_props = $name_prop_db->get_data ($name);
45     unless (defined $name_props) {
46     my $sw3id = $sw3_pages->get_data ($name);
47     main::convert_sw3_page ($sw3id => $name); ## TODO: ...
48    
49     $name_props = $name_prop_db->get_data ($name);
50     unless (defined $name_props) {
51     $name_history_db->append_data ($name => [$time, 'c']);
52     }
53     }
54    
55     push @{$name_props->{id} ||= []}, $id;
56     $name_props->{name} = $name;
57     $name_prop_db->set_data ($name => $name_props);
58    
59     $name_history_db->append_data ($name => [$time, 'a', $id]);
60     }
61    
62     my $user = $args{user} || '(anon)';
63     $vc->commit_changes ("id=$id created by $user");
64     } # associate_names
65    
66 wakaba 1.2 sub update_tfidf ($$) {
67     my ($self, $doc) = @_; ## TODO: $doc should not be an argument
68    
69     ## It is REQUIRED to lock the $id before the invocation of this
70     ## method to keep the consistency of tfidf data for the $id.
71    
72     my $id = $self->id;
73    
74     my $tfidf_db = $self->db->id_tfidf;
75    
76     require SWE::Data::FeatureVector;
77    
78     my $deleted_terms = SWE::Data::FeatureVector->parse_stringref
79     ($tfidf_db->get_data ($id))->as_key_hashref;
80    
81     my $tc = $doc->document_element->text_content;
82    
83     ## TODO: use element semantics...
84    
85     my $orig_tfs = {};
86     my $all_terms = 0;
87     main::for_unique_words ($tc => sub {
88     $orig_tfs->{$_[0]} = $_[1];
89     $all_terms += $_[1];
90     }); ## TODO: XXX
91    
92     my $names_index_db = $self->db->name_inverted_index;
93     $names_index_db->lock;
94    
95     my $idgen = $self->db->id;
96     my $doc_number = $idgen->get_last_id;
97    
98     my $terms = SWE::Data::FeatureVector->new;
99     for my $term (keys %$orig_tfs) {
100     my $n_tf = $orig_tfs->{$term} / $all_terms;
101    
102     my $df = $names_index_db->get_count ($term);
103     my $idf = log ($doc_number / ($df + 1));
104    
105     my $tfidf = $n_tf * $idf;
106    
107     $terms->set_tfidf ($term, $tfidf);
108     $names_index_db->add_data ($term => $id => $tfidf);
109    
110     delete $deleted_terms->{$term};
111     }
112    
113     for my $term (keys %$deleted_terms) {
114     $names_index_db->delete_data ($term, $id);
115     }
116    
117     $tfidf_db->set_data ($id => \( $terms->stringify ));
118     } # update_tfidf
119    
120 wakaba 1.1 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24