The algorithm is generating some FalsePositives, hence added Fine Tuning through iterations
use warnings;
use strict;
use Data::Dumper;
#my @data = map {rand} 1..100;
my @dt = (1,2,3,40,40,40,40,42,43,45,80,85,90,91,91,91,91,4,9,10);
my @clustercenters = getClusterCenters(3,@dt);
@clustercenters = sort { $a <=> $b } @clustercenters;
my ($low, $medium, $high) = @clustercenters;
my %tags = ( $low => "low",
$medium => "medium",
$high =>"high",
);
print ("\n\n $low \t$medium \t$high\n");
print "\nclosest(12): ", $tags{ closest(12, @clustercenters) };
print "\nclosest(43): ", $tags{ closest(43, @clustercenters) };
print "\n";
sub closest {
my ($val,@arr) = @_;
my @list = sort { abs($a - $val) <=> abs($b - $val) } @arr;
return $list[0];
}
sub getClusterCenters{
my ($n, @data) = @_;
my $iter = 4;
my @centers = ();
for (1..$iter){
my @clustercenters = get1DClusterCenters($n,@data);
@clustercenters = sort { $a <=> $b } @clustercenters;
print "\n",join("\t", @clustercenters);
my @tcenters = @clustercenters;
for(my $i=0; $i <= $#clustercenters; $i++){ $centers[$i] +=
+$clustercenters[$i]; }
}
print "\n",join("\t", @centers );
@centers = map { $_ = $_ / $iter; } @centers;
return @centers;
}
# It takes a 1D array of values and returns centers of clusters sorted
sub get1DClusterCenters{
my ($num_clust, @data) = @_;
my $tol = 0.001; # stopping tolerance
# initialize by choosing random points the data
my @center = @data[ map {rand @data} 1..$num_clust ];
my $diff;
my @members;
my @cluster;
do {
$diff = 0;
# Assign points to nearest center
my @cluster;
foreach my $point (@data) {
my $closest = 0;
my $dist = abs $point - $center[ $closest ];
for my $idx (1..$#center) {
if (abs $point - $center[ $idx ] < $dist) {
$dist = abs $point - $center[ $idx ];
$closest = $idx;
}
}
push @cluster, [$point, $closest];
}
# compute new centers
foreach my $center_idx (0..$#center) {
@members = grep {$_->[1] == $center_idx} @cluster;
my $sum = 0;
# print "\n\n** group $center_idx \n";
foreach my $member (@members) {
# print "\t ",$member->[0];
$sum += $member->[0];
}
my $new_center = @members ? $sum / @members : $center[ $ce
+nter_idx ];
$diff += abs $center[ $center_idx ] - $new_center;
$center[ $center_idx ] = $new_center;
}
} while ($diff > $tol);
#print "Centers are:\n";
my @cluster_means = ();
foreach my $center_idx (0..$#center) {
#print "\n$center_idx $center[ $center_idx ]\n";
push (@cluster_means, int($center[ $center_idx ]) );
}
@cluster_means = sort { $a <=> $b } @cluster_means;
# print "\nCLUSTER MEANS: ", join(",", @cluster_means);
return @cluster_means;
}
|