Mercurial > hg > nsaunier > traffic-intelligence
comparison python/ml.py @ 952:a9b2beef0db4
loading and assigning motion patterns works
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Mon, 24 Jul 2017 21:22:18 -0400 |
| parents | d6c1c05d11f5 |
| children | 989917b1ed85 |
comparison
equal
deleted
inserted
replaced
| 951:2a4f174879dd | 952:a9b2beef0db4 |
|---|---|
| 165 if assign: | 165 if assign: |
| 166 prototypeIndices.remove(smallestClusterIndex) | 166 prototypeIndices.remove(smallestClusterIndex) |
| 167 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] | 167 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] |
| 168 return prototypeIndices, labels | 168 return prototypeIndices, labels |
| 169 | 169 |
| 170 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, assign = True, initialPrototypeIndices = None): | 170 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, optimizeCentroid = True, randomInitialization = False, initialPrototypeIndices = None): |
| 171 '''Finds exemplar (prototype) instance that represent each cluster | 171 '''Finds exemplar (prototype) instance that represent each cluster |
| 172 Returns the prototype indices (in the instances list) and the cluster label of each instance | 172 Returns the prototype indices (in the instances list) |
| 173 | 173 |
| 174 the elements in the instances list must have a length (method __len__), or one can use the random initialization | 174 the elements in the instances list must have a length (method __len__), or one can use the random initialization |
| 175 the positions in the instances list corresponds to the similarities | 175 the positions in the instances list corresponds to the similarities |
| 176 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) | 176 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) |
| 177 similarities must still be allocated with the right size | 177 similarities must still be allocated with the right size |
| 234 clusterIndices = clusters[label] | 234 clusterIndices = clusters[label] |
| 235 clusterSimilarities = similarities[clusterIndices][:,clusterIndices] | 235 clusterSimilarities = similarities[clusterIndices][:,clusterIndices] |
| 236 newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()] | 236 newCentroidIdx = clusterIndices[clusterSimilarities.sum(0).argmax()] |
| 237 if prototypeIndices[label] != newCentroidIdx: | 237 if prototypeIndices[label] != newCentroidIdx: |
| 238 prototypeIndices[label] = newCentroidIdx | 238 prototypeIndices[label] = newCentroidIdx |
| 239 elif randomInitialization: # replace prototype by current instance i if longer | 239 elif len(instances[prototypeIndices[label]]) < len(instances[i]): # replace prototype by current instance i if longer # otherwise, possible to test if randomInitialization or initialPrototypes is not None |
| 240 if len(instances[prototypeIndices[label]]) < len(instances[i]): | 240 prototypeIndices[label] = i |
| 241 prototypeIndices[label] = i | 241 return prototypeIndices |
| 242 | |
| 243 if assign: | |
| 244 return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize) | |
| 245 else: | |
| 246 return prototypeIndices, None | |
| 247 | 242 |
| 248 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): | 243 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): |
| 249 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 244 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
| 250 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) | 245 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) |
| 251 return clusterSizes | 246 return clusterSizes |
