nsaunier/traffic-intelligence: python/ml.py comparison

comparison python/ml.py @ 795:a34ec862371f

merged with dev branch

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Mon, 09 May 2016 15:33:11 -0400
parents	1158a6e2d28e
children	180b6b0231c0

comparison

equal deleted inserted replaced

-:0a05883216cf
+:a34ec862371f
 #! /usr/bin/env python
 '''Libraries for machine learning algorithms'''
+from os import path
+from random import shuffle
+from copy import copy, deepcopy
 import numpy as np
+from matplotlib.pylab import text
+import matplotlib as mpl
-class Model(object):
+import matplotlib.pyplot as plt
-'''Abstract class for loading/saving model'''
+from scipy.cluster.vq import kmeans, whiten, vq
+from sklearn import mixture
+import cv2
+import utils
+#####################
+# OpenCV ML models
+#####################
+class StatModel(object):
+'''Abstract class for loading/saving model
+Issues with OpenCV, does not seem to work'''
 def load(self, filename):
-from os import path
 if path.exists(filename):
 self.model.load(filename)
 else:
 print('Provided filename {} does not exist: model not loaded!'.format(filename))
 def save(self, filename):
 self.model.save(filename)
-class SVM(Model):
+class SVM(StatModel):
 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
+def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
-def __init__(self):
+self.model = cv2.ml.SVM_create()
-import cv2
+self.model.setType(svmType)
-self.model = cv2.SVM()
+self.model.setKernel(kernelType)
+self.model.setDegree(degree)
-def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
+self.model.setGamma(gamma)
-self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
+self.model.setCoef0(coef0)
-self.model.train(samples, responses, params = self.params)
+self.model.setC(Cvalue)
+self.model.setNu(nu)
+self.model.setP(p)
+def load(self, filename):
+if path.exists(filename):
+cv2.ml.SVM_load(filename)
+else:
+print('Provided filename {} does not exist: model not loaded!'.format(filename))
+def train(self, samples, layout, responses):
+self.model.train(samples, layout, responses)
 def predict(self, hog):
 return self.model.predict(hog)
+#####################
+# Clustering
+#####################
 class Centroid(object):
 'Wrapper around instances to add a counter'
 def __init__(self, instance, nInstances = 1):
 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
 inst.multiply(1/(self.nInstances+instance.nInstances))
 return Centroid(inst, self.nInstances+instance.nInstances)
 def plot(self, options = ''):
-from matplotlib.pylab import text
 self.instance.plot(options)
 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
 '''Algorithm that clusters any dataset based on a similarity matrix
 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
 The number of clusters will be determined accordingly
 data: list of instances
 averageCentroid: '''
-from random import shuffle
-from copy import copy, deepcopy
 localdata = copy(data) # shallow copy to avoid modifying data
 if shuffleData:
 shuffle(localdata)
 if initialCentroids is None:
 centroids = [Centroid(localdata[0])]
 	U,sigma,V = np.linalg.svd(L)
 	# create feature vector from k first eigenvectors
 	# by stacking eigenvectors as columns
 	features = np.array(V[:k]).T
 	# k-means
-	from scipy.cluster.vq import kmeans, whiten, vq
 	features = whiten(features)
 	centroids,distortion = kmeans(features,k, iter)
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
 	return code,sigma
 return prototypeIndices, labels
 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
-clusterSizes['outlier'] = sum(np.array(labels) == -1)
+clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
 return clusterSizes
+# Gaussian Mixture Models
+def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nPixelsPerUnit = 1., alpha = 0.3):
+'''plot the ellipse corresponding to the Gaussians
+and the predicted classes of the instances in the dataset'''
+if fig is None:
+fig = plt.figure()
+labels = model.predict(dataset)
+tmpDataset = nPixelsPerUnit*dataset
+for i in xrange(model.n_components):
+mean = nPixelsPerUnit*model.means_[i]
+covariance = nPixelsPerUnit*model.covars_[i]
+if dataset is not None:
+plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i])
+plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
+# Plot an ellipse to show the Gaussian component
+v, w = np.linalg.eigh(covariance)
+angle = np.arctan2(w[0][1], w[0][0])
+angle = 180*angle/np.pi  # convert to degrees
+	v *= 4
+ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
+ell.set_clip_box(fig.bbox)
+ell.set_alpha(alpha)
+fig.axes[0].add_artist(ell)

Mercurial > hg > nsaunier > traffic-intelligence

comparison python/ml.py @ 795:a34ec862371f