Mercurial > hg > nsaunier > traffic-intelligence
comparison python/ml.py @ 795:a34ec862371f
merged with dev branch
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Mon, 09 May 2016 15:33:11 -0400 |
| parents | 1158a6e2d28e |
| children | 180b6b0231c0 |
comparison
equal
deleted
inserted
replaced
| 758:0a05883216cf | 795:a34ec862371f |
|---|---|
| 1 #! /usr/bin/env python | 1 #! /usr/bin/env python |
| 2 '''Libraries for machine learning algorithms''' | 2 '''Libraries for machine learning algorithms''' |
| 3 | 3 |
| 4 from os import path | |
| 5 from random import shuffle | |
| 6 from copy import copy, deepcopy | |
| 7 | |
| 4 import numpy as np | 8 import numpy as np |
| 5 | 9 from matplotlib.pylab import text |
| 6 | 10 import matplotlib as mpl |
| 7 class Model(object): | 11 import matplotlib.pyplot as plt |
| 8 '''Abstract class for loading/saving model''' | 12 from scipy.cluster.vq import kmeans, whiten, vq |
| 13 from sklearn import mixture | |
| 14 import cv2 | |
| 15 | |
| 16 import utils | |
| 17 | |
| 18 ##################### | |
| 19 # OpenCV ML models | |
| 20 ##################### | |
| 21 | |
| 22 class StatModel(object): | |
| 23 '''Abstract class for loading/saving model | |
| 24 | |
| 25 Issues with OpenCV, does not seem to work''' | |
| 9 def load(self, filename): | 26 def load(self, filename): |
| 10 from os import path | |
| 11 if path.exists(filename): | 27 if path.exists(filename): |
| 12 self.model.load(filename) | 28 self.model.load(filename) |
| 13 else: | 29 else: |
| 14 print('Provided filename {} does not exist: model not loaded!'.format(filename)) | 30 print('Provided filename {} does not exist: model not loaded!'.format(filename)) |
| 15 | 31 |
| 16 def save(self, filename): | 32 def save(self, filename): |
| 17 self.model.save(filename) | 33 self.model.save(filename) |
| 18 | 34 |
| 19 class SVM(Model): | 35 class SVM(StatModel): |
| 20 '''wrapper for OpenCV SimpleVectorMachine algorithm''' | 36 '''wrapper for OpenCV SimpleVectorMachine algorithm''' |
| 21 | 37 def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): |
| 22 def __init__(self): | 38 self.model = cv2.ml.SVM_create() |
| 23 import cv2 | 39 self.model.setType(svmType) |
| 24 self.model = cv2.SVM() | 40 self.model.setKernel(kernelType) |
| 25 | 41 self.model.setDegree(degree) |
| 26 def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): | 42 self.model.setGamma(gamma) |
| 27 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) | 43 self.model.setCoef0(coef0) |
| 28 self.model.train(samples, responses, params = self.params) | 44 self.model.setC(Cvalue) |
| 45 self.model.setNu(nu) | |
| 46 self.model.setP(p) | |
| 47 | |
| 48 def load(self, filename): | |
| 49 if path.exists(filename): | |
| 50 cv2.ml.SVM_load(filename) | |
| 51 else: | |
| 52 print('Provided filename {} does not exist: model not loaded!'.format(filename)) | |
| 53 | |
| 54 def train(self, samples, layout, responses): | |
| 55 self.model.train(samples, layout, responses) | |
| 29 | 56 |
| 30 def predict(self, hog): | 57 def predict(self, hog): |
| 31 return self.model.predict(hog) | 58 return self.model.predict(hog) |
| 32 | 59 |
| 60 | |
| 61 ##################### | |
| 62 # Clustering | |
| 63 ##################### | |
| 33 | 64 |
| 34 class Centroid(object): | 65 class Centroid(object): |
| 35 'Wrapper around instances to add a counter' | 66 'Wrapper around instances to add a counter' |
| 36 | 67 |
| 37 def __init__(self, instance, nInstances = 1): | 68 def __init__(self, instance, nInstances = 1): |
| 50 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) | 81 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
| 51 inst.multiply(1/(self.nInstances+instance.nInstances)) | 82 inst.multiply(1/(self.nInstances+instance.nInstances)) |
| 52 return Centroid(inst, self.nInstances+instance.nInstances) | 83 return Centroid(inst, self.nInstances+instance.nInstances) |
| 53 | 84 |
| 54 def plot(self, options = ''): | 85 def plot(self, options = ''): |
| 55 from matplotlib.pylab import text | |
| 56 self.instance.plot(options) | 86 self.instance.plot(options) |
| 57 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) | 87 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
| 58 | 88 |
| 59 def kMedoids(similarityMatrix, initialCentroids = None, k = None): | 89 def kMedoids(similarityMatrix, initialCentroids = None, k = None): |
| 60 '''Algorithm that clusters any dataset based on a similarity matrix | 90 '''Algorithm that clusters any dataset based on a similarity matrix |
| 66 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. | 96 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
| 67 The number of clusters will be determined accordingly | 97 The number of clusters will be determined accordingly |
| 68 | 98 |
| 69 data: list of instances | 99 data: list of instances |
| 70 averageCentroid: ''' | 100 averageCentroid: ''' |
| 71 | |
| 72 from random import shuffle | |
| 73 from copy import copy, deepcopy | |
| 74 localdata = copy(data) # shallow copy to avoid modifying data | 101 localdata = copy(data) # shallow copy to avoid modifying data |
| 75 if shuffleData: | 102 if shuffleData: |
| 76 shuffle(localdata) | 103 shuffle(localdata) |
| 77 if initialCentroids is None: | 104 if initialCentroids is None: |
| 78 centroids = [Centroid(localdata[0])] | 105 centroids = [Centroid(localdata[0])] |
| 103 U,sigma,V = np.linalg.svd(L) | 130 U,sigma,V = np.linalg.svd(L) |
| 104 # create feature vector from k first eigenvectors | 131 # create feature vector from k first eigenvectors |
| 105 # by stacking eigenvectors as columns | 132 # by stacking eigenvectors as columns |
| 106 features = np.array(V[:k]).T | 133 features = np.array(V[:k]).T |
| 107 # k-means | 134 # k-means |
| 108 from scipy.cluster.vq import kmeans, whiten, vq | |
| 109 features = whiten(features) | 135 features = whiten(features) |
| 110 centroids,distortion = kmeans(features,k, iter) | 136 centroids,distortion = kmeans(features,k, iter) |
| 111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 137 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
| 112 return code,sigma | 138 return code,sigma |
| 113 | 139 |
| 177 | 203 |
| 178 return prototypeIndices, labels | 204 return prototypeIndices, labels |
| 179 | 205 |
| 180 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): | 206 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): |
| 181 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} | 207 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} |
| 182 clusterSizes['outlier'] = sum(np.array(labels) == -1) | 208 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) |
| 183 return clusterSizes | 209 return clusterSizes |
| 210 | |
| 211 # Gaussian Mixture Models | |
| 212 def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nPixelsPerUnit = 1., alpha = 0.3): | |
| 213 '''plot the ellipse corresponding to the Gaussians | |
| 214 and the predicted classes of the instances in the dataset''' | |
| 215 if fig is None: | |
| 216 fig = plt.figure() | |
| 217 labels = model.predict(dataset) | |
| 218 tmpDataset = nPixelsPerUnit*dataset | |
| 219 for i in xrange(model.n_components): | |
| 220 mean = nPixelsPerUnit*model.means_[i] | |
| 221 covariance = nPixelsPerUnit*model.covars_[i] | |
| 222 if dataset is not None: | |
| 223 plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i]) | |
| 224 plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1)) | |
| 225 | |
| 226 # Plot an ellipse to show the Gaussian component | |
| 227 v, w = np.linalg.eigh(covariance) | |
| 228 angle = np.arctan2(w[0][1], w[0][0]) | |
| 229 angle = 180*angle/np.pi # convert to degrees | |
| 230 v *= 4 | |
| 231 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) | |
| 232 ell.set_clip_box(fig.bbox) | |
| 233 ell.set_alpha(alpha) | |
| 234 fig.axes[0].add_artist(ell) |
