nsaunier/traffic-intelligence: python/ml.py comparison

comparison python/ml.py @ 746:e7ff0f60fef8

merged new developments (indicator and trajectory clustering)

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Thu, 10 Sep 2015 15:52:45 -0400
parents	2472b4d59aea
children	1f2b2d1f4fbf

comparison

equal deleted inserted replaced

-:c6d4ea05a2d0
+:e7ff0f60fef8
 	features = whiten(features)
 	centroids,distortion = kmeans(features,k, iter)
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
 	return code,sigma
-def motionPatterLearning(objects, maxDistance):
+def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False):
-'''
+'''Finds exemplar (prototype) instance that represent each cluster
-Option to use only the (n?) longest features per object instead of all for speed up
+Returns the prototype indices (in the instances list) and the cluster label of each instance
-TODO'''
-pass
-def prototypeCluster():
+the elements in the instances list must have a length (method __len__), or one can use the random initialization
-'''
+the positions in the instances list corresponds to the similarities
-TODO'''
+if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
-pass
+similarities must still be allocated with the right size
+if an instance is different enough (<minSimilarity),
+it will become a new prototype.
+Non-prototype instances will be assigned to an existing prototype
+if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
+and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
+# sort instances based on length
+indices = range(len(instances))
+if randomInitialization:
+indices = np.random.permutation(indices)
+else:
+def compare(i, j):
+if len(instances[i]) > len(instances[j]):
+return -1
+elif len(instances[i]) == len(instances[j]):
+return 0
+else:
+return 1
+indices.sort(compare)
+# go through all instances
+prototypeIndices = [indices[0]]
+for i in indices[1:]:
+if similarityFunc is not None:
+for j in prototypeIndices:
+if similarities[i][j] < 0:
+similarities[i][j] = similarityFunc(instances[i], instances[j])
+similarities[j][i] = similarities[i][j]
+if similarities[i][prototypeIndices].max() < minSimilarity:
+prototypeIndices.append(i)
+# assignment
+indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
+assign = True
+while assign:
+labels = [-1]*similarities.shape[0]
+for i in prototypeIndices:
+labels[i] = i
+for i in indices:
+if similarityFunc is not None:
+for j in prototypeIndices:
+if similarities[i][j] < 0:
+similarities[i][j] = similarityFunc(instances[i], instances[j])
+similarities[j][i] = similarities[i][j]
+prototypeIdx = similarities[i][prototypeIndices].argmax()
+if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
+labels[i] = prototypeIndices[prototypeIdx]
+else:
+labels[i] = -1 # outlier
+clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
+smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
+assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
+if assign:
+prototypeIndices.remove(smallestClusterIndex)
+indices.append(smallestClusterIndex)
+return prototypeIndices, labels
+def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
+clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
+clusterSizes['outlier'] = sum(np.array(labels) == -1)
+return clusterSizes

Mercurial > hg > nsaunier > traffic-intelligence

comparison python/ml.py @ 746:e7ff0f60fef8