1 |
package SWE::Object::Graph; |
2 |
use strict; |
3 |
use warnings; |
4 |
|
5 |
sub new ($%) { |
6 |
my $class = shift; |
7 |
my $self = bless {@_}, $class; |
8 |
|
9 |
return $self; |
10 |
} |
11 |
|
12 |
## ------ Database ------ |
13 |
|
14 |
sub db ($) { $_[0]->{db} } |
15 |
|
16 |
sub repo ($) { $_[0]->{repo} } |
17 |
|
18 |
sub lock () { |
19 |
my $self = shift; |
20 |
|
21 |
if ($self->{lock_n}++ == 0) { |
22 |
my $lock = $self->{lock} ||= do { |
23 |
require SWE::DB::Lock; |
24 |
my $lock = SWE::DB::Lock->new; |
25 |
$lock->{file_name} = $self->db->graph_dir_name . 'graph.lock'; |
26 |
$lock->lock_type ('Graph'); |
27 |
$lock; |
28 |
}; |
29 |
|
30 |
$lock->lock; |
31 |
}; |
32 |
} # lock |
33 |
|
34 |
sub unlock () { |
35 |
my $self = shift; |
36 |
|
37 |
if (--$self->{lock_n} <= 0 and $self->{lock}) { |
38 |
$self->{lock}->unlock; |
39 |
} |
40 |
} # unlock |
41 |
|
42 |
use constant EMPTY_NODE_RATIO => 0.2; |
43 |
use constant INITIAL_DEGREE => 5; |
44 |
|
45 |
sub add_nodes ($$) { |
46 |
my ($self, $new_doc_number) = @_; |
47 |
|
48 |
$self->lock; |
49 |
|
50 |
my $global_prop_db = $self->db->global_prop; |
51 |
|
52 |
my $last_node_index = ${$global_prop_db->get_data ('lastnodeindex') || \ 0}; |
53 |
my $doc_on_node_number = ${$global_prop_db->get_data ('doconnodenumber') || \ 0}; |
54 |
$doc_on_node_number += $new_doc_number; |
55 |
my $max_node_index = $doc_on_node_number / (1 - EMPTY_NODE_RATIO); |
56 |
$max_node_index = $last_node_index if $max_node_index < $last_node_index; |
57 |
$max_node_index = $last_node_index + $new_doc_number |
58 |
if $max_node_index < $last_node_index + $new_doc_number; |
59 |
$max_node_index = int $max_node_index; |
60 |
|
61 |
if ($last_node_index < $max_node_index) { |
62 |
my $new_edges = {}; |
63 |
|
64 |
for my $index1 ($last_node_index + 1 .. $max_node_index) { |
65 |
for (1 .. INITIAL_DEGREE) { |
66 |
#my $index2 = int rand $index1; |
67 |
my $index2 = $index1 - rand 50; # XXX |
68 |
|
69 |
$new_edges->{$index1}->{$index2} = 1; |
70 |
$new_edges->{$index2}->{$index1} = 1; |
71 |
} |
72 |
} |
73 |
|
74 |
my $graph_prop_db = $self->db->graph_prop; |
75 |
for my $index1 (keys %$new_edges) { |
76 |
my $node = $graph_prop_db->get_data ($index1); |
77 |
my $edges = $node->{neighbors} ||= {}; |
78 |
for my $index2 (keys %{$new_edges->{$index1}}) { |
79 |
$edges->{$index2} = 1; |
80 |
} |
81 |
$graph_prop_db->set_data ($index1 => $node); |
82 |
} |
83 |
|
84 |
$global_prop_db->set_data (lastnodeindex => \$max_node_index); |
85 |
} |
86 |
$global_prop_db->set_data (doconnodenumber => \$doc_on_node_number); |
87 |
|
88 |
$self->unlock; |
89 |
|
90 |
return ($last_node_index + 1 .. $max_node_index); |
91 |
} # add_nodes |
92 |
|
93 |
sub create_node ($$) { |
94 |
my ($self, $doc_id) = @_; |
95 |
|
96 |
## In fact we don't need to lock the entire graph (though the |
97 |
## |add_nodes| method might require it anyway), but we don't have |
98 |
## way to lock a particular node only at the moment. |
99 |
$self->lock; |
100 |
|
101 |
my ($node_id) = $self->add_nodes (1); |
102 |
|
103 |
require SWE::Object::Node; |
104 |
my $node = SWE::Object::Node->new (db => $self->db, repo => $self->repo); |
105 |
$node->create (id => $node_id); |
106 |
$node->prop->{ids}->{$doc_id} = 1; |
107 |
$node->save_prop; |
108 |
|
109 |
$self->unlock; |
110 |
|
111 |
return $node; |
112 |
} # create_node |
113 |
|
114 |
sub get_node_by_id ($$) { |
115 |
my ($self, $node_id) = @_; |
116 |
|
117 |
## TODO: cache |
118 |
|
119 |
require SWE::Object::Node; |
120 |
my $node = SWE::Object::Node->new (db => $self->db, repo => $self->repo); |
121 |
$node->load (id => $node_id); |
122 |
|
123 |
return $node; |
124 |
} # get_node_by_id |
125 |
|
126 |
use constant RELATEDNESS_THRESHOLD => 0.9; |
127 |
|
128 |
sub schelling_update ($$) { |
129 |
my ($self, $node_id) = @_; |
130 |
|
131 |
$self->lock; |
132 |
|
133 |
my $node = $self->get_node_by_id ($node_id); |
134 |
my $doc_id = $node->document_id // return; |
135 |
|
136 |
my $repo = $self->repo; |
137 |
|
138 |
my $related = 0; |
139 |
my $n = 0; |
140 |
|
141 |
my $neighbor_ids = $node->neighbor_ids; |
142 |
my $unused_nodes = []; |
143 |
for my $n_node_id (keys %$neighbor_ids) { |
144 |
$n++; |
145 |
my $n_node = $self->get_node_by_id ($n_node_id); |
146 |
my $n_doc_id = $n_node->document_id; |
147 |
unless (defined $n_doc_id) { |
148 |
push @$unused_nodes, $n_node; |
149 |
next; |
150 |
} |
151 |
$related++ if $repo->are_related_ids ($node_id, $n_doc_id); |
152 |
} |
153 |
|
154 |
my $v = 0; |
155 |
if ($n) { |
156 |
$v = $related / $n; |
157 |
} |
158 |
|
159 |
if ($v < RELATEDNESS_THRESHOLD and @$unused_nodes) { |
160 |
my $unused_node = $unused_nodes->[rand @$unused_nodes]; |
161 |
|
162 |
my $doc = $repo->get_document_by_id ($doc_id); |
163 |
$doc->lock; |
164 |
my $id_prop = $doc->untainted_prop; |
165 |
|
166 |
$id_prop->{node_id} = $unused_node->id; |
167 |
$unused_node->prop->{ids}->{$doc_id} = 1; |
168 |
delete $node->prop->{ids}->{$doc_id}; |
169 |
|
170 |
$unused_node->save_prop; |
171 |
$doc->save_prop; |
172 |
$node->save_prop; |
173 |
|
174 |
$doc->unlock; |
175 |
} |
176 |
|
177 |
$self->unlock; |
178 |
} # schelling_update |
179 |
|
180 |
1; |