Mercurial > hg > nsaunier > traffic-intelligence
diff python/ml.py @ 746:e7ff0f60fef8
merged new developments (indicator and trajectory clustering)
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Thu, 10 Sep 2015 15:52:45 -0400 |
| parents | 2472b4d59aea |
| children | 1f2b2d1f4fbf |
line wrap: on
line diff
--- a/python/ml.py Mon Aug 10 01:06:59 2015 -0400 +++ b/python/ml.py Thu Sep 10 15:52:45 2015 -0400 @@ -111,13 +111,73 @@ code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) return code,sigma -def motionPatterLearning(objects, maxDistance): - ''' - Option to use only the (n?) longest features per object instead of all for speed up - TODO''' - pass +def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False): + '''Finds exemplar (prototype) instance that represent each cluster + Returns the prototype indices (in the instances list) and the cluster label of each instance + + the elements in the instances list must have a length (method __len__), or one can use the random initialization + the positions in the instances list corresponds to the similarities + if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) + similarities must still be allocated with the right size + + if an instance is different enough (<minSimilarity), + it will become a new prototype. + Non-prototype instances will be assigned to an existing prototype + if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters + and reassigning all elements in the cluster until no cluster is smaller than minClusterSize''' -def prototypeCluster(): - ''' - TODO''' - pass + # sort instances based on length + indices = range(len(instances)) + if randomInitialization: + indices = np.random.permutation(indices) + else: + def compare(i, j): + if len(instances[i]) > len(instances[j]): + return -1 + elif len(instances[i]) == len(instances[j]): + return 0 + else: + return 1 + indices.sort(compare) + # go through all instances + prototypeIndices = [indices[0]] + for i in indices[1:]: + if similarityFunc is not None: + for j in prototypeIndices: + if similarities[i][j] < 0: + similarities[i][j] = similarityFunc(instances[i], instances[j]) + similarities[j][i] = similarities[i][j] + if similarities[i][prototypeIndices].max() < minSimilarity: + prototypeIndices.append(i) + + # assignment + indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices] + assign = True + while assign: + labels = [-1]*similarities.shape[0] + for i in prototypeIndices: + labels[i] = i + for i in indices: + if similarityFunc is not None: + for j in prototypeIndices: + if similarities[i][j] < 0: + similarities[i][j] = similarityFunc(instances[i], instances[j]) + similarities[j][i] = similarities[i][j] + prototypeIdx = similarities[i][prototypeIndices].argmax() + if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity: + labels[i] = prototypeIndices[prototypeIdx] + else: + labels[i] = -1 # outlier + clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} + smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) + assign = (clusterSizes[smallestClusterIndex] < minClusterSize) + if assign: + prototypeIndices.remove(smallestClusterIndex) + indices.append(smallestClusterIndex) + + return prototypeIndices, labels + +def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): + clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} + clusterSizes['outlier'] = sum(np.array(labels) == -1) + return clusterSizes
