Mercurial > hg > nsaunier > traffic-intelligence
comparison trafficintelligence/ml.py @ 1054:d13f9bfbf3ff
Retry
| author | Wendlasida |
|---|---|
| date | Fri, 06 Jul 2018 18:42:58 -0400 |
| parents | 75a6ad604cc5 |
| children | ab4c72b9475c |
comparison
equal
deleted
inserted
replaced
| 1053:60cc87e824c4 | 1054:d13f9bfbf3ff |
|---|---|
| 148 features = whiten(features) | 148 features = whiten(features) |
| 149 centroids,distortion = kmeans(features,k, iter) | 149 centroids,distortion = kmeans(features,k, iter) |
| 150 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 150 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
| 151 return code,sigma | 151 return code,sigma |
| 152 | 152 |
| 153 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0): | 153 def assignToPrototypeClusters(instances, initialPrototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize = 0): |
| 154 '''Assigns instances to prototypes | 154 '''Assigns instances to prototypes |
| 155 if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters | 155 if minClusterSize is not 0, the clusters will be refined by removing iteratively the smallest clusters |
| 156 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize | 156 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize |
| 157 | 157 |
| 158 labels are indices in the prototypeIndices''' | 158 labels are indices in the prototypeIndices''' |
| 159 if similarityFunc is None: | 159 prototypeIndices = copy(initialPrototypeIndices) |
| 160 print('similarityFunc is None') | |
| 161 return None | |
| 162 | |
| 163 indices = [i for i in range(len(instances)) if i not in prototypeIndices] | 160 indices = [i for i in range(len(instances)) if i not in prototypeIndices] |
| 164 labels = [-1]*len(instances) | 161 labels = [-1]*len(instances) |
| 165 assign = True | 162 assign = True |
| 166 while assign: | 163 while assign: |
| 167 for i in prototypeIndices: | 164 for i in prototypeIndices: |
| 182 if assign: | 179 if assign: |
| 183 prototypeIndices.remove(smallestClusterIndex) | 180 prototypeIndices.remove(smallestClusterIndex) |
| 184 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] | 181 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] |
| 185 return prototypeIndices, labels | 182 return prototypeIndices, labels |
| 186 | 183 |
| 187 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): | 184 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc, optimizeCentroid = False, randomInitialization = False, initialPrototypeIndices = None): |
| 188 '''Finds exemplar (prototype) instance that represent each cluster | 185 '''Finds exemplar (prototype) instance that represent each cluster |
| 189 Returns the prototype indices (in the instances list) | 186 Returns the prototype indices (in the instances list) |
| 190 | 187 |
| 191 the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid | 188 the elements in the instances list must have a length (method __len__), or one can use the optimizeCentroid |
| 192 the positions in the instances list corresponds to the similarities | 189 the positions in the instances list corresponds to the similarities |
| 202 initialPrototypeIndices are indices in instances | 199 initialPrototypeIndices are indices in instances |
| 203 | 200 |
| 204 TODO: check how similarity evolves in clusters''' | 201 TODO: check how similarity evolves in clusters''' |
| 205 if len(instances) == 0: | 202 if len(instances) == 0: |
| 206 print('no instances to cluster (empty list)') | 203 print('no instances to cluster (empty list)') |
| 207 return None | |
| 208 if similarityFunc is None: | |
| 209 print('similarityFunc is None') | |
| 210 return None | 204 return None |
| 211 | 205 |
| 212 # sort instances based on length | 206 # sort instances based on length |
| 213 indices = list(range(len(instances))) | 207 indices = list(range(len(instances))) |
| 214 if randomInitialization or optimizeCentroid: | 208 if randomInitialization or optimizeCentroid: |
| 215 indices = np.random.permutation(indices).tolist() | 209 indices = np.random.permutation(indices).tolist() |
| 216 else: | 210 else: |
| 217 def compare(i, j): | |
| 218 if len(instances[i]) > len(instances[j]): | |
| 219 return -1 | |
| 220 elif len(instances[i]) == len(instances[j]): | |
| 221 return 0 | |
| 222 else: | |
| 223 return 1 | |
| 224 indices.sort(key=lambda i: len(instances[i])) | 211 indices.sort(key=lambda i: len(instances[i])) |
| 225 # initialize clusters | 212 # initialize clusters |
| 226 clusters = [] | 213 clusters = [] |
| 227 if initialPrototypeIndices is None: | 214 if initialPrototypeIndices is None: |
| 228 prototypeIndices = [indices[0]] | 215 prototypeIndices = [indices[0]] |
