| 1 |
wakaba |
1.1 |
package SWE::Object::Repository; |
| 2 |
|
|
use strict; |
| 3 |
|
|
use warnings; |
| 4 |
|
|
|
| 5 |
|
|
sub new ($%) { |
| 6 |
|
|
my $class = shift; |
| 7 |
|
|
my $self = bless {@_}, $class; |
| 8 |
|
|
|
| 9 |
|
|
return $self; |
| 10 |
|
|
} |
| 11 |
|
|
|
| 12 |
|
|
sub db ($) { $_[0]->{db} } |
| 13 |
|
|
|
| 14 |
|
|
my $weight_file_name = 'data/weight.txt'; |
| 15 |
|
|
|
| 16 |
|
|
sub term_weight_vector ($) { |
| 17 |
|
|
my $self = shift; |
| 18 |
|
|
|
| 19 |
|
|
## TODO: lock |
| 20 |
|
|
|
| 21 |
|
|
## TODO: use global props |
| 22 |
|
|
|
| 23 |
|
|
return $self->{term_weight_vector} ||= do { |
| 24 |
|
|
require SWE::Data::FeatureVector; |
| 25 |
|
|
|
| 26 |
|
|
my $w; |
| 27 |
|
|
if (-f $weight_file_name) { |
| 28 |
|
|
local $/ = undef; |
| 29 |
|
|
open my $file, '<:encoding(utf8)', $weight_file_name or die "$0: $weight_file_name: $!"; |
| 30 |
|
|
$w = SWE::Data::FeatureVector->parse_stringref (\<$file>); |
| 31 |
|
|
close $file; |
| 32 |
|
|
} else { |
| 33 |
|
|
$w = SWE::Data::FeatureVector->new; |
| 34 |
|
|
} |
| 35 |
|
|
delete $self->{term_weight_vector_modified}; |
| 36 |
|
|
$w; |
| 37 |
|
|
}; |
| 38 |
|
|
} # term_weight_vector |
| 39 |
|
|
|
| 40 |
|
|
sub save_term_weight_vector ($) { |
| 41 |
|
|
my $self = shift; |
| 42 |
|
|
return unless $self->{term_weight_vector_modified}; |
| 43 |
|
|
|
| 44 |
|
|
## TODO: use global props |
| 45 |
|
|
|
| 46 |
|
|
open my $file, '>:encoding(utf8)', $weight_file_name or die "$0: $weight_file_name: $!"; |
| 47 |
|
|
print $file $self->{term_weight_vector}->stringify; |
| 48 |
|
|
close $file; |
| 49 |
|
|
} # save_term_weight_vector |
| 50 |
|
|
|
| 51 |
|
|
sub are_related_ids ($$$;$) { |
| 52 |
|
|
my ($self, $id1, $id2, $answer) = @_; |
| 53 |
|
|
|
| 54 |
|
|
my $w = $self->term_weight_vector; |
| 55 |
|
|
|
| 56 |
|
|
my $tfidf_db = $self->db->id_tfidf; |
| 57 |
wakaba |
1.2 |
|
| 58 |
|
|
## TODO: cache |
| 59 |
wakaba |
1.1 |
|
| 60 |
|
|
require SWE::Data::FeatureVector; |
| 61 |
|
|
my $fv1 = SWE::Data::FeatureVector->parse_stringref |
| 62 |
|
|
($tfidf_db->get_data ($id1)); |
| 63 |
|
|
my $fv2 = SWE::Data::FeatureVector->parse_stringref |
| 64 |
|
|
($tfidf_db->get_data ($id2)); |
| 65 |
|
|
|
| 66 |
|
|
my $diff = $fv1->subtract ($fv2); |
| 67 |
wakaba |
1.2 |
|
| 68 |
|
|
my $i = 0; |
| 69 |
|
|
A: { |
| 70 |
|
|
my $wx = $diff->multiply ($w)->component_sum; |
| 71 |
|
|
my $y = $wx >= 0 ? 1 : -1; |
| 72 |
|
|
|
| 73 |
|
|
if (defined $answer and $y * $answer < 0) { |
| 74 |
|
|
$w = $y > 0 ? $w->subtract ($diff) : $w->add ($diff); |
| 75 |
|
|
$self->{term_weight_vector} = $w; |
| 76 |
|
|
$self->{term_weight_vector_modified} = 1; |
| 77 |
|
|
$i++; |
| 78 |
|
|
redo A unless $i > 20; |
| 79 |
|
|
} |
| 80 |
|
|
|
| 81 |
|
|
return $y > 0; |
| 82 |
wakaba |
1.1 |
} |
| 83 |
|
|
} # are_related_ids |
| 84 |
|
|
|
| 85 |
|
|
1; |