| 1 |
wakaba |
1.1 |
package SWE::Object::Repository; |
| 2 |
|
|
use strict; |
| 3 |
|
|
use warnings; |
| 4 |
wakaba |
1.4 |
use Scalar::Util qw/weaken/; |
| 5 |
wakaba |
1.1 |
|
| 6 |
|
|
sub new ($%) { |
| 7 |
|
|
my $class = shift; |
| 8 |
|
|
my $self = bless {@_}, $class; |
| 9 |
|
|
|
| 10 |
|
|
return $self; |
| 11 |
|
|
} |
| 12 |
|
|
|
| 13 |
|
|
sub db ($) { $_[0]->{db} } |
| 14 |
|
|
|
| 15 |
wakaba |
1.4 |
sub graph ($) { |
| 16 |
|
|
my $self = shift; |
| 17 |
|
|
return $self->{graph} ||= do { |
| 18 |
|
|
require SWE::Object::Graph; |
| 19 |
|
|
my $g = SWE::Object::Graph->new (repo => $self, db => $self->db); |
| 20 |
|
|
weaken $g->{repo}; |
| 21 |
|
|
$g; |
| 22 |
|
|
}; |
| 23 |
|
|
} # graph |
| 24 |
|
|
|
| 25 |
|
|
sub get_document_by_id ($$) { |
| 26 |
|
|
my ($self, $doc_id) = @_; |
| 27 |
|
|
|
| 28 |
|
|
return $self->{document}->{$doc_id} ||= do { |
| 29 |
|
|
require SWE::Object::Document; |
| 30 |
|
|
my $doc = SWE::Object::Document->new |
| 31 |
|
|
(repo => $self, db => $self->db, id => $doc_id); |
| 32 |
|
|
weaken $doc->{repo}; |
| 33 |
|
|
$doc; |
| 34 |
|
|
}; |
| 35 |
|
|
} # get_document_by_id |
| 36 |
|
|
|
| 37 |
wakaba |
1.5 |
## ------ The Term Weight Vector ------ |
| 38 |
wakaba |
1.1 |
|
| 39 |
wakaba |
1.5 |
sub weight_lock () { |
| 40 |
wakaba |
1.1 |
my $self = shift; |
| 41 |
|
|
|
| 42 |
wakaba |
1.5 |
if ($self->{weight_lock_n}++ == 0) { |
| 43 |
|
|
my $lock = $self->{weight_lock} ||= do { |
| 44 |
|
|
require SWE::DB::Lock; |
| 45 |
|
|
my $lock = SWE::DB::Lock->new; |
| 46 |
|
|
$lock->{file_name} = $self->db->global_dir_name . 'weight.lock'; |
| 47 |
|
|
$lock->lock_type ('Weight'); |
| 48 |
|
|
$lock; |
| 49 |
|
|
}; |
| 50 |
|
|
|
| 51 |
|
|
$lock->lock; |
| 52 |
|
|
}; |
| 53 |
|
|
} # weight_lock |
| 54 |
wakaba |
1.1 |
|
| 55 |
wakaba |
1.5 |
sub weight_unlock () { |
| 56 |
|
|
my $self = shift; |
| 57 |
|
|
|
| 58 |
|
|
if (--$self->{weight_lock_n} <= 0 and $self->{weight_lock}) { |
| 59 |
|
|
$self->{weight_lock}->unlock; |
| 60 |
|
|
} |
| 61 |
|
|
} # weight_unlock |
| 62 |
|
|
|
| 63 |
|
|
sub term_weight_vector ($) { |
| 64 |
|
|
my $self = shift; |
| 65 |
wakaba |
1.1 |
|
| 66 |
|
|
return $self->{term_weight_vector} ||= do { |
| 67 |
|
|
require SWE::Data::FeatureVector; |
| 68 |
|
|
|
| 69 |
wakaba |
1.5 |
my $global_prop_db = $self->db->global_prop; |
| 70 |
|
|
my $w = SWE::Data::FeatureVector->parse_stringref |
| 71 |
|
|
($global_prop_db->get_data ('termweightvector') || \ ''); |
| 72 |
wakaba |
1.1 |
delete $self->{term_weight_vector_modified}; |
| 73 |
|
|
$w; |
| 74 |
|
|
}; |
| 75 |
|
|
} # term_weight_vector |
| 76 |
|
|
|
| 77 |
|
|
sub save_term_weight_vector ($) { |
| 78 |
|
|
my $self = shift; |
| 79 |
|
|
return unless $self->{term_weight_vector_modified}; |
| 80 |
wakaba |
1.5 |
|
| 81 |
|
|
my $global_prop_db = $self->db->global_prop; |
| 82 |
|
|
$global_prop_db->set_data |
| 83 |
|
|
(termweightvector => \($self->{term_weight_vector}->stringify)); |
| 84 |
wakaba |
1.1 |
} # save_term_weight_vector |
| 85 |
|
|
|
| 86 |
|
|
sub are_related_ids ($$$;$) { |
| 87 |
|
|
my ($self, $id1, $id2, $answer) = @_; |
| 88 |
|
|
|
| 89 |
|
|
my $w = $self->term_weight_vector; |
| 90 |
|
|
|
| 91 |
|
|
my $tfidf_db = $self->db->id_tfidf; |
| 92 |
wakaba |
1.2 |
|
| 93 |
|
|
## TODO: cache |
| 94 |
wakaba |
1.1 |
|
| 95 |
|
|
require SWE::Data::FeatureVector; |
| 96 |
|
|
my $fv1 = SWE::Data::FeatureVector->parse_stringref |
| 97 |
wakaba |
1.3 |
($tfidf_db->get_data ($id1) // return undef); |
| 98 |
wakaba |
1.1 |
my $fv2 = SWE::Data::FeatureVector->parse_stringref |
| 99 |
wakaba |
1.3 |
($tfidf_db->get_data ($id2) // return undef); |
| 100 |
wakaba |
1.1 |
|
| 101 |
|
|
my $diff = $fv1->subtract ($fv2); |
| 102 |
wakaba |
1.2 |
|
| 103 |
|
|
my $i = 0; |
| 104 |
|
|
A: { |
| 105 |
|
|
my $wx = $diff->multiply ($w)->component_sum; |
| 106 |
|
|
my $y = $wx >= 0 ? 1 : -1; |
| 107 |
|
|
|
| 108 |
|
|
if (defined $answer and $y * $answer < 0) { |
| 109 |
|
|
$w = $y > 0 ? $w->subtract ($diff) : $w->add ($diff); |
| 110 |
|
|
$self->{term_weight_vector} = $w; |
| 111 |
|
|
$self->{term_weight_vector_modified} = 1; |
| 112 |
|
|
$i++; |
| 113 |
|
|
redo A unless $i > 20; |
| 114 |
|
|
} |
| 115 |
|
|
|
| 116 |
|
|
return $y > 0; |
| 117 |
wakaba |
1.1 |
} |
| 118 |
|
|
} # are_related_ids |
| 119 |
|
|
|
| 120 |
|
|
1; |