Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

Re^2: Making sense of data: Clustering OR A coding challenge

by mahesh557 (Novice)
on Jun 07, 2016 at 19:33 UTC ( [id://1165107]=note: print w/replies, xml ) Need Help??


in reply to Re: Making sense of data: Clustering OR A coding challenge
in thread Making sense of data: Clustering OR A coding challenge

The algorithm is generating some FalsePositives, hence added Fine Tuning through iterations

use warnings; use strict; use Data::Dumper; #my @data = map {rand} 1..100; my @dt = (1,2,3,40,40,40,40,42,43,45,80,85,90,91,91,91,91,4,9,10); my @clustercenters = getClusterCenters(3,@dt); @clustercenters = sort { $a <=> $b } @clustercenters; my ($low, $medium, $high) = @clustercenters; my %tags = ( $low => "low", $medium => "medium", $high =>"high", ); print ("\n\n $low \t$medium \t$high\n"); print "\nclosest(12): ", $tags{ closest(12, @clustercenters) }; print "\nclosest(43): ", $tags{ closest(43, @clustercenters) }; print "\n"; sub closest { my ($val,@arr) = @_; my @list = sort { abs($a - $val) <=> abs($b - $val) } @arr; return $list[0]; } sub getClusterCenters{ my ($n, @data) = @_; my $iter = 4; my @centers = (); for (1..$iter){ my @clustercenters = get1DClusterCenters($n,@data); @clustercenters = sort { $a <=> $b } @clustercenters; print "\n",join("\t", @clustercenters); my @tcenters = @clustercenters; for(my $i=0; $i <= $#clustercenters; $i++){ $centers[$i] += +$clustercenters[$i]; } } print "\n",join("\t", @centers ); @centers = map { $_ = $_ / $iter; } @centers; return @centers; } # It takes a 1D array of values and returns centers of clusters sorted sub get1DClusterCenters{ my ($num_clust, @data) = @_; my $tol = 0.001; # stopping tolerance # initialize by choosing random points the data my @center = @data[ map {rand @data} 1..$num_clust ]; my $diff; my @members; my @cluster; do { $diff = 0; # Assign points to nearest center my @cluster; foreach my $point (@data) { my $closest = 0; my $dist = abs $point - $center[ $closest ]; for my $idx (1..$#center) { if (abs $point - $center[ $idx ] < $dist) { $dist = abs $point - $center[ $idx ]; $closest = $idx; } } push @cluster, [$point, $closest]; } # compute new centers foreach my $center_idx (0..$#center) { @members = grep {$_->[1] == $center_idx} @cluster; my $sum = 0; # print "\n\n** group $center_idx \n"; foreach my $member (@members) { # print "\t ",$member->[0]; $sum += $member->[0]; } my $new_center = @members ? $sum / @members : $center[ $ce +nter_idx ]; $diff += abs $center[ $center_idx ] - $new_center; $center[ $center_idx ] = $new_center; } } while ($diff > $tol); #print "Centers are:\n"; my @cluster_means = (); foreach my $center_idx (0..$#center) { #print "\n$center_idx $center[ $center_idx ]\n"; push (@cluster_means, int($center[ $center_idx ]) ); } @cluster_means = sort { $a <=> $b } @cluster_means; # print "\nCLUSTER MEANS: ", join(",", @cluster_means); return @cluster_means; }

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://1165107]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others scrutinizing the Monastery: (2)
As of 2024-04-20 03:52 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found