Mercurial > hg > nsaunier > traffic-intelligence
comparison python/ml.py @ 746:e7ff0f60fef8
merged new developments (indicator and trajectory clustering)
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Thu, 10 Sep 2015 15:52:45 -0400 |
| parents | 2472b4d59aea |
| children | 1f2b2d1f4fbf |
comparison
equal
deleted
inserted
replaced
| 727:c6d4ea05a2d0 | 746:e7ff0f60fef8 |
|---|---|
| 109 features = whiten(features) | 109 features = whiten(features) |
| 110 centroids,distortion = kmeans(features,k, iter) | 110 centroids,distortion = kmeans(features,k, iter) |
| 111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
| 112 return code,sigma | 112 return code,sigma |
| 113 | 113 |
| 114 def motionPatterLearning(objects, maxDistance): | 114 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False): |
| 115 ''' | 115 '''Finds exemplar (prototype) instance that represent each cluster |
| 116 Option to use only the (n?) longest features per object instead of all for speed up | 116 Returns the prototype indices (in the instances list) and the cluster label of each instance |
| 117 TODO''' | |
| 118 pass | |
| 119 | 117 |
| 120 def prototypeCluster(): | 118 the elements in the instances list must have a length (method __len__), or one can use the random initialization |
| 121 ''' | 119 the positions in the instances list corresponds to the similarities |
| 122 TODO''' | 120 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) |
| 123 pass | 121 similarities must still be allocated with the right size |
| 122 | |
| 123 if an instance is different enough (<minSimilarity), | |
| 124 it will become a new prototype. | |
| 125 Non-prototype instances will be assigned to an existing prototype | |
| 126 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters | |
| 127 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize''' | |
| 128 | |
| 129 # sort instances based on length | |
| 130 indices = range(len(instances)) | |
| 131 if randomInitialization: | |
| 132 indices = np.random.permutation(indices) | |
| 133 else: | |
| 134 def compare(i, j): | |
| 135 if len(instances[i]) > len(instances[j]): | |
| 136 return -1 | |
| 137 elif len(instances[i]) == len(instances[j]): | |
| 138 return 0 | |
| 139 else: | |
| 140 return 1 | |
| 141 indices.sort(compare) | |
| 142 # go through all instances | |
| 143 prototypeIndices = [indices[0]] | |
| 144 for i in indices[1:]: | |
| 145 if similarityFunc is not None: | |
| 146 for j in prototypeIndices: | |
| 147 if similarities[i][j] < 0: | |
| 148 similarities[i][j] = similarityFunc(instances[i], instances[j]) | |
| 149 similarities[j][i] = similarities[i][j] | |
| 150 if similarities[i][prototypeIndices].max() < minSimilarity: | |
| 151 prototypeIndices.append(i) | |
| 152 | |
| 153 # assignment | |
| 154 indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices] | |
| 155 assign = True | |
| 156 while assign: | |
| 157 labels = [-1]*similarities.shape[0] | |
| 158 for i in prototypeIndices: | |
| 159 labels[i] = i | |
| 160 for i in indices: | |
| 161 if similarityFunc is not None: | |
| 162 for j in prototypeIndices: | |
| 163 if similarities[i][j] < 0: | |
| 164 similarities[i][j] = similarityFunc(instances[i], instances[j]) | |
| 165 similarities[j][i] = similarities[i][j] | |
| 166 prototypeIdx = similarities[i][prototypeIndices].argmax() | |
| 167 if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity: | |
| 168 labels[i] = prototypeIndices[prototypeIdx] | |
| 169 else: | |
| 170 labels[i] = -1 # outlier | |
| 171 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | |
| 172 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) | |
| 173 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) | |
| 174 if assign: | |
| 175 prototypeIndices.remove(smallestClusterIndex) | |
| 176 indices.append(smallestClusterIndex) | |
| 177 | |
| 178 return prototypeIndices, labels | |
| 179 | |
| 180 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): | |
| 181 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | |
| 182 clusterSizes['outlier'] = sum(np.array(labels) == -1) | |
| 183 return clusterSizes |
