Mercurial > hg > nsaunier > traffic-intelligence
diff python/ml.py @ 795:a34ec862371f
merged with dev branch
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Mon, 09 May 2016 15:33:11 -0400 |
| parents | 1158a6e2d28e |
| children | 180b6b0231c0 |
line wrap: on
line diff
--- a/python/ml.py Tue Nov 03 13:48:56 2015 -0500 +++ b/python/ml.py Mon May 09 15:33:11 2016 -0400 @@ -1,13 +1,29 @@ #! /usr/bin/env python '''Libraries for machine learning algorithms''' +from os import path +from random import shuffle +from copy import copy, deepcopy + import numpy as np - +from matplotlib.pylab import text +import matplotlib as mpl +import matplotlib.pyplot as plt +from scipy.cluster.vq import kmeans, whiten, vq +from sklearn import mixture +import cv2 -class Model(object): - '''Abstract class for loading/saving model''' +import utils + +##################### +# OpenCV ML models +##################### + +class StatModel(object): + '''Abstract class for loading/saving model + + Issues with OpenCV, does not seem to work''' def load(self, filename): - from os import path if path.exists(filename): self.model.load(filename) else: @@ -16,21 +32,36 @@ def save(self, filename): self.model.save(filename) -class SVM(Model): +class SVM(StatModel): '''wrapper for OpenCV SimpleVectorMachine algorithm''' + def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): + self.model = cv2.ml.SVM_create() + self.model.setType(svmType) + self.model.setKernel(kernelType) + self.model.setDegree(degree) + self.model.setGamma(gamma) + self.model.setCoef0(coef0) + self.model.setC(Cvalue) + self.model.setNu(nu) + self.model.setP(p) - def __init__(self): - import cv2 - self.model = cv2.SVM() + def load(self, filename): + if path.exists(filename): + cv2.ml.SVM_load(filename) + else: + print('Provided filename {} does not exist: model not loaded!'.format(filename)) - def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): - self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) - self.model.train(samples, responses, params = self.params) + def train(self, samples, layout, responses): + self.model.train(samples, layout, responses) def predict(self, hog): return self.model.predict(hog) +##################### +# Clustering +##################### + class Centroid(object): 'Wrapper around instances to add a counter' @@ -52,7 +83,6 @@ return Centroid(inst, self.nInstances+instance.nInstances) def plot(self, options = ''): - from matplotlib.pylab import text self.instance.plot(options) text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) @@ -68,9 +98,6 @@ data: list of instances averageCentroid: ''' - - from random import shuffle - from copy import copy, deepcopy localdata = copy(data) # shallow copy to avoid modifying data if shuffleData: shuffle(localdata) @@ -105,7 +132,6 @@ # by stacking eigenvectors as columns features = np.array(V[:k]).T # k-means - from scipy.cluster.vq import kmeans, whiten, vq features = whiten(features) centroids,distortion = kmeans(features,k, iter) code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) @@ -179,5 +205,30 @@ def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} - clusterSizes['outlier'] = sum(np.array(labels) == -1) + clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex) return clusterSizes + +# Gaussian Mixture Models +def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nPixelsPerUnit = 1., alpha = 0.3): + '''plot the ellipse corresponding to the Gaussians + and the predicted classes of the instances in the dataset''' + if fig is None: + fig = plt.figure() + labels = model.predict(dataset) + tmpDataset = nPixelsPerUnit*dataset + for i in xrange(model.n_components): + mean = nPixelsPerUnit*model.means_[i] + covariance = nPixelsPerUnit*model.covars_[i] + if dataset is not None: + plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i]) + plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1)) + + # Plot an ellipse to show the Gaussian component + v, w = np.linalg.eigh(covariance) + angle = np.arctan2(w[0][1], w[0][0]) + angle = 180*angle/np.pi # convert to degrees + v *= 4 + ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i]) + ell.set_clip_box(fig.bbox) + ell.set_alpha(alpha) + fig.axes[0].add_artist(ell)
