#!/usr/bin/perl
use warnings;
use strict;

use PDL;


# fcm: fuzzy c-means implementation in Perl
# usage: $fcm [number_of_clusters] [fuzzification_factor]
#        [max_iter] [tolerace]
# returns: prototypes, partition_matrix
# 

#
# reading data
#

my ( @data, @tmp, $number_of_patterns, $max_row_number, $max_column_number );

while (defined(my $line = <DATA>)) {
    chomp ($line);
    @tmp = split /\s+/, $line;
    push @data, [ @tmp ];
}

$number_of_patterns  = @data;

my $patterns = pdl(@data);

#
# assigning other variables
#
my $number_of_clusters   = shift @ARGV;
my $fuzzification_factor = shift @ARGV;
my $max_iter             = shift @ARGV;
my $tolerance            = shift @ARGV;

unless (defined($number_of_clusters)) {
    $number_of_clusters ||= 2;
}
unless (defined($fuzzification_factor)) {
    $fuzzification_factor ||= 2.0;
}
unless (defined($max_iter)) { $max_iter ||= 40; }
unless (defined($tolerance)) { $tolerance ||= 0.00001; }

$number_of_clusters    = abs($number_of_clusters);
$fuzzification_factor  = abs($fuzzification_factor);
$max_iter              = abs($max_iter);
$tolerance             = abs($tolerance);

#
# initializing partition matrices
#
my $previous_partition_matrix;
my $current_partition_matrix = 
     initialize_partition_matrix($number_of_clusters, $number_of_patterns);

#
# output variables
#
my $prototypes;
my $performance_index;

#
# fuzzy c means implementation
#
$max_row_number = $number_of_patterns - 1;
$max_column_number = $number_of_clusters - 1;
my $iter = 0;
while (1) {
    # computing each prototype
    my $temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor;
    my $temp_prototypes = mv( $temporal_partition_matrix  x $patterns,1,0) / sumover($temporal_partition_matrix);
    $prototypes = mv($temp_prototypes,1,0);

    # copying partition matrix
    $previous_partition_matrix = $current_partition_matrix->copy;

    # updating the partition matrix
    my $dist = zeroes $number_of_patterns, $number_of_clusters;
    for my $i (0..$max_row_number){
        for my $j (0..$max_column_number){
            my $temp_distance = distance($patterns->slice(":,$i"), $prototypes->slice(":,$j"), \&euclidean );
            $dist->set($i, $j, $temp_distance);
        }
    }

    my $temp_variable = $dist ** (-2/($fuzzification_factor - 1));
    $current_partition_matrix = $temp_variable / sumover(mv($temp_variable,1,0));

    #
    # Performance Index calculation
    #
    $temporal_partition_matrix = $current_partition_matrix ** $fuzzification_factor;
    $performance_index = sum($temporal_partition_matrix * ( $dist ** 2 ));

    # checking stop conditions
    my $diff_partition_matrix = $current_partition_matrix - $previous_partition_matrix;
    $iter++;
    if ( ($diff_partition_matrix->max < $tolerance) || ($iter > $max_iter) ) {
      last;
    }
    print "iter = $iter\n";
}

print "=======================================\n";
print "clustering completed\n";
print "performance index = $performance_index\n";
print "prototypes = \n";
print $prototypes;
print "current partition matrix = \n";
print $current_partition_matrix;


# ================================
# initialize_partition_matrix
# partition_matrix = 
#   initialize_partition_matrix(
#     num_clusters, num_patterns)
# ================================
sub initialize_partition_matrix {
  my ($partition_matrix, $column_sum);

  $partition_matrix = random($_[1],$_[0]);
  $column_sum = sumover (mv($partition_matrix, 1, 0));#sum over columns
  $partition_matrix /= $column_sum;

  return $partition_matrix;
}

# ====================================
# compute distance between two vectors
# dist = distance( vector1, vector2, /&type_of_distance )
# ====================================
sub distance{
  my ($vector1, $vector2, $type_of_distance) = @_;
  my ($r) = $vector1 - $vector2;
  $type_of_distance->($r);
}

sub manhattan{ sum(abs($_[0]));}
sub euclidean{ sqrt(sum($_[0] ** 2) );}
sub tschebyschev{ max(abs($_[0])); }


__DATA__
4.0	4.0
4.0	5.0
5.0	4.0
5.5	6.0
5.0	5.0
4.5	4.5
5.0	5.5
5.5	5.0
5.0	4.5
4.5	5.0
9.5	9.0
9.0	9.5
8.0	8.0
7.0	8.0
8.0	7.0
8.5	7.0
7.0	8.5
7.0	7.0
7.5	7.0
6.5	8.0
8.0	6.5
6.5	7.0
10.0	10.0
10.0	9.0
10.0	9.0
9.5	10.0
8.0	10.0
9.5	9.5
9.0	9.0
9.0	10.0