# HG changeset patch
# User Nicolas Saunier <nicolas.saunier@polymtl.ca>
# Date 1519075979 18000
# Node ID 23f98ebb113fa07df5b714a9618b54c0b71cdfbd
# Parent  cc89267b5ff942ad4db8e9cb705c1b5b4ebff328
first tests for clustering algo

diff -r cc89267b5ff9 -r 23f98ebb113f python/ml.py
--- a/python/ml.py	Mon Feb 19 10:47:19 2018 -0500
+++ b/python/ml.py	Mon Feb 19 16:32:59 2018 -0500
@@ -156,8 +156,14 @@
 
 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0):
     '''Assigns instances to prototypes 
-    if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
-    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+    if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters
+    and reassigning all elements in the cluster until no cluster is smaller than minClusterSize
+
+    labels are indices in the prototypeIndices'''
+    if similarityFunc is None:
+        print('similarityFunc is None')
+        return None
+
     indices = [i for i in range(len(instances)) if i not in prototypeIndices]
     labels = [-1]*len(instances)
     assign = True
@@ -165,14 +171,13 @@
         for i in prototypeIndices:
             labels[i] = i
         for i in indices:
-            if similarityFunc is not None:
-                for j in prototypeIndices:
-                    if similarities[i][j] < 0:
-                        similarities[i][j] = similarityFunc(instances[i], instances[j])
-                        similarities[j][i] = similarities[i][j]
-            prototypeIdx = similarities[i][prototypeIndices].argmax()
-            if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
-                labels[i] = prototypeIndices[prototypeIdx]
+            for j in prototypeIndices:
+                if similarities[i][j] < 0:
+                    similarities[i][j] = similarityFunc(instances[i], instances[j])
+                    similarities[j][i] = similarities[i][j]
+            label = similarities[i][prototypeIndices].argmax()
+            if similarities[i][prototypeIndices[label]] >= minSimilarity:
+                labels[i] = prototypeIndices[label]
             else:
                 labels[i] = -1 # outlier
         clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
@@ -182,11 +187,12 @@
             prototypeIndices.remove(smallestClusterIndex)
             indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex]
     return prototypeIndices, labels
-def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = True, randomInitialization = False, initialPrototypeIndices = None):
+
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None):
     '''Finds exemplar (prototype) instance that represent each cluster
     Returns the prototype indices (in the instances list)
 
-    the elements in the instances list must have a length (method __len__), or one can use the random initialization
+    the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid
     the positions in the instances list corresponds to the similarities
     if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
     similarities must still be allocated with the right size
@@ -210,7 +216,7 @@
     # sort instances based on length
     indices = range(len(instances))
     if randomInitialization or optimizeCentroid:
-        indices = np.random.permutation(indices)
+        indices = np.random.permutation(indices).tolist()
     else:
         def compare(i, j):
             if len(instances[i]) > len(instances[j]):
@@ -220,7 +226,7 @@
             else:
                 return 1
         indices.sort(compare)
-    # go through all instances
+    # initialize clusters
     clusters = []
     if initialPrototypeIndices is None:
         prototypeIndices = [indices[0]]
@@ -229,12 +235,13 @@
     for i in prototypeIndices:
         clusters.append([i])
         indices.remove(i)
+    # go through all instances
     for i in indices:
         for j in prototypeIndices:
             if similarities[i][j] < 0:
                 similarities[i][j] = similarityFunc(instances[i], instances[j])
                 similarities[j][i] = similarities[i][j]
-        label = similarities[i][prototypeIndices].argmax()
+        label = similarities[i][prototypeIndices].argmax() # index in prototypeIndices
         if similarities[i][prototypeIndices[label]] < minSimilarity:
             prototypeIndices.append(i)
             clusters.append([])
@@ -313,3 +320,11 @@
         ywidth = 0.5*(maxima[1]-minima[1])
         plt.xlim(minima[0]-xwidth,maxima[0]+xwidth)
         plt.ylim(minima[1]-ywidth,maxima[1]+ywidth)
+
+if __name__ == "__main__":
+    import doctest
+    import unittest
+    suite = doctest.DocFileSuite('tests/ml.txt')
+    unittest.TextTestRunner().run(suite)
+#     #doctest.testmod()
+#     #doctest.testfile("example.txt")
diff -r cc89267b5ff9 -r 23f98ebb113f python/tests/ml.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/tests/ml.txt	Mon Feb 19 16:32:59 2018 -0500
@@ -0,0 +1,11 @@
+>>> from math import fabs
+>>> from numpy import ones
+>>> from ml import prototypeCluster
+
+>>> nTrajectories = 7
+>>> similarityFunc = lambda x, y: 1.-fabs(x-y)/(nTrajectories-1)
+>>> similarities = -ones((nTrajectories, nTrajectories))
+>>> prototypeIndices = prototypeCluster(range(nTrajectories), similarities, 1., similarityFunc, optimizeCentroid = True) # too large to be similar
+>>> len(prototypeIndices) == nTrajectories
+True
+>>> # could use lists to have a length
diff -r cc89267b5ff9 -r 23f98ebb113f scripts/learn-motion-patterns.py
--- a/scripts/learn-motion-patterns.py	Mon Feb 19 10:47:19 2018 -0500
+++ b/scripts/learn-motion-patterns.py	Mon Feb 19 16:32:59 2018 -0500
@@ -76,8 +76,7 @@
 # the next line can be called again without reinitializing similarities
 if args.learn:
     prototypeIndices = ml.prototypeCluster(trajectories, similarities, args.minSimilarity, similarityFunc, args.optimizeCentroid, args.randomInitialization, initialPrototypeIndices)
-# assignment is done if explicitly passed as argument or if working on the same database (starting prototypes from scratch and assigning the )
-# (otherwise the matchings will not compare and one has to to matchings on a large scale at once)
+# assignment is done if explicitly passed as argument or if working on the same database (starting prototypes from scratch and assigning them)
 else:
     prototypeIndices = initialPrototypeIndices