Mercurial > hg > nsaunier > traffic-intelligence
comparison python/ml.py @ 907:9fd7b18f75b4
re arranged motion pattern learning
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Fri, 23 Jun 2017 23:50:02 -0400 |
| parents | 8e8ec4ece66e |
| children | b297525b2cbf |
comparison
equal
deleted
inserted
replaced
| 906:a57e6fbcd8e3 | 907:9fd7b18f75b4 |
|---|---|
| 112 return centroids | 112 return centroids |
| 113 | 113 |
| 114 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements | 114 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements |
| 115 | 115 |
| 116 def spectralClustering(similarityMatrix, k, iter=20): | 116 def spectralClustering(similarityMatrix, k, iter=20): |
| 117 '''Spectral Clustering algorithm''' | 117 '''Spectral Clustering algorithm''' |
| 118 n = len(similarityMatrix) | 118 n = len(similarityMatrix) |
| 119 # create Laplacian matrix | 119 # create Laplacian matrix |
| 120 rowsum = np.sum(similarityMatrix,axis=0) | 120 rowsum = np.sum(similarityMatrix,axis=0) |
| 121 D = np.diag(1 / np.sqrt(rowsum)) | 121 D = np.diag(1 / np.sqrt(rowsum)) |
| 122 I = np.identity(n) | 122 I = np.identity(n) |
| 123 L = I - np.dot(D,np.dot(similarityMatrix,D)) | 123 L = I - np.dot(D,np.dot(similarityMatrix,D)) |
| 124 # compute eigenvectors of L | 124 # compute eigenvectors of L |
| 125 U,sigma,V = np.linalg.svd(L) | 125 U,sigma,V = np.linalg.svd(L) |
| 126 # create feature vector from k first eigenvectors | 126 # create feature vector from k first eigenvectors |
| 127 # by stacking eigenvectors as columns | 127 # by stacking eigenvectors as columns |
| 128 features = np.array(V[:k]).T | 128 features = np.array(V[:k]).T |
| 129 # k-means | 129 # k-means |
| 130 features = whiten(features) | 130 features = whiten(features) |
| 131 centroids,distortion = kmeans(features,k, iter) | 131 centroids,distortion = kmeans(features,k, iter) |
| 132 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 132 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
| 133 return code,sigma | 133 return code,sigma |
| 134 | 134 |
| 135 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False): | 135 def assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc = None, minClusterSize = None): |
| 136 '''Finds exemplar (prototype) instance that represent each cluster | 136 '''Assigns instances to prototypes |
| 137 Returns the prototype indices (in the instances list) and the cluster label of each instance | |
| 138 | |
| 139 the elements in the instances list must have a length (method __len__), or one can use the random initialization | |
| 140 the positions in the instances list corresponds to the similarities | |
| 141 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) | |
| 142 similarities must still be allocated with the right size | |
| 143 | |
| 144 if an instance is different enough (<minSimilarity), | |
| 145 it will become a new prototype. | |
| 146 Non-prototype instances will be assigned to an existing prototype | |
| 147 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters | 137 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters |
| 148 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize | 138 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize''' |
| 149 | 139 indices = [i for i in range(len(instances)) if i not in prototypeIndices] |
| 150 TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)''' | 140 labels = [-1]*len(instances) |
| 151 | |
| 152 # sort instances based on length | |
| 153 if len(instances) == 0: | |
| 154 print('no instances to cluster (empty list)') | |
| 155 return None | |
| 156 | |
| 157 indices = range(len(instances)) | |
| 158 if randomInitialization: | |
| 159 indices = np.random.permutation(indices) | |
| 160 else: | |
| 161 def compare(i, j): | |
| 162 if len(instances[i]) > len(instances[j]): | |
| 163 return -1 | |
| 164 elif len(instances[i]) == len(instances[j]): | |
| 165 return 0 | |
| 166 else: | |
| 167 return 1 | |
| 168 indices.sort(compare) | |
| 169 # go through all instances | |
| 170 prototypeIndices = [indices[0]] | |
| 171 for i in indices[1:]: | |
| 172 if similarityFunc is not None: | |
| 173 for j in prototypeIndices: | |
| 174 if similarities[i][j] < 0: | |
| 175 similarities[i][j] = similarityFunc(instances[i], instances[j]) | |
| 176 similarities[j][i] = similarities[i][j] | |
| 177 if similarities[i][prototypeIndices].max() < minSimilarity: | |
| 178 prototypeIndices.append(i) | |
| 179 elif randomInitialization: # replace prototype by current instance i if longer | |
| 180 label = similarities[i][prototypeIndices].argmax() | |
| 181 if len(instances[prototypeIndices[label]]) < len(instances[i]): | |
| 182 prototypeIndices[label] = i | |
| 183 | |
| 184 # assignment | |
| 185 indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices] | |
| 186 assign = True | 141 assign = True |
| 187 while assign: | 142 while assign: |
| 188 labels = [-1]*similarities.shape[0] | |
| 189 for i in prototypeIndices: | 143 for i in prototypeIndices: |
| 190 labels[i] = i | 144 labels[i] = i |
| 191 for i in indices: | 145 for i in indices: |
| 192 if similarityFunc is not None: | 146 if similarityFunc is not None: |
| 193 for j in prototypeIndices: | 147 for j in prototypeIndices: |
| 202 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 156 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
| 203 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) | 157 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get) |
| 204 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) | 158 assign = (clusterSizes[smallestClusterIndex] < minClusterSize) |
| 205 if assign: | 159 if assign: |
| 206 prototypeIndices.remove(smallestClusterIndex) | 160 prototypeIndices.remove(smallestClusterIndex) |
| 207 indices.append(smallestClusterIndex) | 161 indices = [i for i in range(similarities.shape[0]) if labels[i] == smallestClusterIndex] |
| 208 | |
| 209 return prototypeIndices, labels | 162 return prototypeIndices, labels |
| 163 | |
| 164 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = 0, randomInitialization = False, assign = True, initialPrototypeIndices = None): | |
| 165 '''Finds exemplar (prototype) instance that represent each cluster | |
| 166 Returns the prototype indices (in the instances list) and the cluster label of each instance | |
| 167 | |
| 168 the elements in the instances list must have a length (method __len__), or one can use the random initialization | |
| 169 the positions in the instances list corresponds to the similarities | |
| 170 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed) | |
| 171 similarities must still be allocated with the right size | |
| 172 | |
| 173 if an instance is different enough (<minSimilarity), | |
| 174 it will become a new prototype. | |
| 175 Non-prototype instances will be assigned to an existing prototype | |
| 176 | |
| 177 TODO: at each step, optimize the prototype as the most similar in its current cluster (can be done easily if similarities are already computed)''' | |
| 178 | |
| 179 # sort instances based on length | |
| 180 if len(instances) == 0: | |
| 181 print('no instances to cluster (empty list)') | |
| 182 return None | |
| 183 | |
| 184 indices = range(len(instances)) | |
| 185 if randomInitialization: | |
| 186 indices = np.random.permutation(indices) | |
| 187 else: | |
| 188 def compare(i, j): | |
| 189 if len(instances[i]) > len(instances[j]): | |
| 190 return -1 | |
| 191 elif len(instances[i]) == len(instances[j]): | |
| 192 return 0 | |
| 193 else: | |
| 194 return 1 | |
| 195 indices.sort(compare) | |
| 196 # go through all instances | |
| 197 if initialPrototypeIndices is None: | |
| 198 prototypeIndices = [indices[0]] | |
| 199 else: | |
| 200 prototypeIndices = initialPrototypeIndices | |
| 201 for i in indices[1:]: | |
| 202 if similarityFunc is not None: | |
| 203 for j in prototypeIndices: | |
| 204 if similarities[i][j] < 0: | |
| 205 similarities[i][j] = similarityFunc(instances[i], instances[j]) | |
| 206 similarities[j][i] = similarities[i][j] | |
| 207 if similarities[i][prototypeIndices].max() < minSimilarity: | |
| 208 prototypeIndices.append(i) | |
| 209 elif randomInitialization: # replace prototype by current instance i if longer | |
| 210 label = similarities[i][prototypeIndices].argmax() | |
| 211 if len(instances[prototypeIndices[label]]) < len(instances[i]): | |
| 212 prototypeIndices[label] = i | |
| 213 if assign: | |
| 214 return assignToPrototypeClusters(instances, prototypeIndices, similarities, minSimilarity, similarityFunc, minClusterSize) | |
| 215 else: | |
| 216 return prototypeIndices, None | |
| 210 | 217 |
| 211 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): | 218 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): |
| 212 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 219 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
| 213 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) | 220 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) |
| 214 return clusterSizes | 221 return clusterSizes |
| 230 | 237 |
| 231 # Plot an ellipse to show the Gaussian component | 238 # Plot an ellipse to show the Gaussian component |
| 232 v, w = np.linalg.eigh(covariance) | 239 v, w = np.linalg.eigh(covariance) |
| 233 angle = np.arctan2(w[0][1], w[0][0]) | 240 angle = np.arctan2(w[0][1], w[0][0]) |
| 234 angle = 180*angle/np.pi # convert to degrees | 241 angle = 180*angle/np.pi # convert to degrees |
| 235 v *= 4 | 242 v *= 4 |
| 236 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) | 243 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) |
| 237 ell.set_clip_box(fig.bbox) | 244 ell.set_clip_box(fig.bbox) |
| 238 ell.set_alpha(alpha) | 245 ell.set_alpha(alpha) |
| 239 fig.axes[0].add_artist(ell) | 246 fig.axes[0].add_artist(ell) |
| 240 return labels | 247 return labels |
