comparison python/ml.py @ 795:a34ec862371f

merged with dev branch
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 09 May 2016 15:33:11 -0400
parents 1158a6e2d28e
children 180b6b0231c0
comparison
equal deleted inserted replaced
758:0a05883216cf 795:a34ec862371f
1 #! /usr/bin/env python 1 #! /usr/bin/env python
2 '''Libraries for machine learning algorithms''' 2 '''Libraries for machine learning algorithms'''
3 3
4 from os import path
5 from random import shuffle
6 from copy import copy, deepcopy
7
4 import numpy as np 8 import numpy as np
5 9 from matplotlib.pylab import text
6 10 import matplotlib as mpl
7 class Model(object): 11 import matplotlib.pyplot as plt
8 '''Abstract class for loading/saving model''' 12 from scipy.cluster.vq import kmeans, whiten, vq
13 from sklearn import mixture
14 import cv2
15
16 import utils
17
18 #####################
19 # OpenCV ML models
20 #####################
21
22 class StatModel(object):
23 '''Abstract class for loading/saving model
24
25 Issues with OpenCV, does not seem to work'''
9 def load(self, filename): 26 def load(self, filename):
10 from os import path
11 if path.exists(filename): 27 if path.exists(filename):
12 self.model.load(filename) 28 self.model.load(filename)
13 else: 29 else:
14 print('Provided filename {} does not exist: model not loaded!'.format(filename)) 30 print('Provided filename {} does not exist: model not loaded!'.format(filename))
15 31
16 def save(self, filename): 32 def save(self, filename):
17 self.model.save(filename) 33 self.model.save(filename)
18 34
19 class SVM(Model): 35 class SVM(StatModel):
20 '''wrapper for OpenCV SimpleVectorMachine algorithm''' 36 '''wrapper for OpenCV SimpleVectorMachine algorithm'''
21 37 def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
22 def __init__(self): 38 self.model = cv2.ml.SVM_create()
23 import cv2 39 self.model.setType(svmType)
24 self.model = cv2.SVM() 40 self.model.setKernel(kernelType)
25 41 self.model.setDegree(degree)
26 def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): 42 self.model.setGamma(gamma)
27 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) 43 self.model.setCoef0(coef0)
28 self.model.train(samples, responses, params = self.params) 44 self.model.setC(Cvalue)
45 self.model.setNu(nu)
46 self.model.setP(p)
47
48 def load(self, filename):
49 if path.exists(filename):
50 cv2.ml.SVM_load(filename)
51 else:
52 print('Provided filename {} does not exist: model not loaded!'.format(filename))
53
54 def train(self, samples, layout, responses):
55 self.model.train(samples, layout, responses)
29 56
30 def predict(self, hog): 57 def predict(self, hog):
31 return self.model.predict(hog) 58 return self.model.predict(hog)
32 59
60
61 #####################
62 # Clustering
63 #####################
33 64
34 class Centroid(object): 65 class Centroid(object):
35 'Wrapper around instances to add a counter' 66 'Wrapper around instances to add a counter'
36 67
37 def __init__(self, instance, nInstances = 1): 68 def __init__(self, instance, nInstances = 1):
50 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) 81 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances)
51 inst.multiply(1/(self.nInstances+instance.nInstances)) 82 inst.multiply(1/(self.nInstances+instance.nInstances))
52 return Centroid(inst, self.nInstances+instance.nInstances) 83 return Centroid(inst, self.nInstances+instance.nInstances)
53 84
54 def plot(self, options = ''): 85 def plot(self, options = ''):
55 from matplotlib.pylab import text
56 self.instance.plot(options) 86 self.instance.plot(options)
57 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) 87 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
58 88
59 def kMedoids(similarityMatrix, initialCentroids = None, k = None): 89 def kMedoids(similarityMatrix, initialCentroids = None, k = None):
60 '''Algorithm that clusters any dataset based on a similarity matrix 90 '''Algorithm that clusters any dataset based on a similarity matrix
66 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. 96 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function.
67 The number of clusters will be determined accordingly 97 The number of clusters will be determined accordingly
68 98
69 data: list of instances 99 data: list of instances
70 averageCentroid: ''' 100 averageCentroid: '''
71
72 from random import shuffle
73 from copy import copy, deepcopy
74 localdata = copy(data) # shallow copy to avoid modifying data 101 localdata = copy(data) # shallow copy to avoid modifying data
75 if shuffleData: 102 if shuffleData:
76 shuffle(localdata) 103 shuffle(localdata)
77 if initialCentroids is None: 104 if initialCentroids is None:
78 centroids = [Centroid(localdata[0])] 105 centroids = [Centroid(localdata[0])]
103 U,sigma,V = np.linalg.svd(L) 130 U,sigma,V = np.linalg.svd(L)
104 # create feature vector from k first eigenvectors 131 # create feature vector from k first eigenvectors
105 # by stacking eigenvectors as columns 132 # by stacking eigenvectors as columns
106 features = np.array(V[:k]).T 133 features = np.array(V[:k]).T
107 # k-means 134 # k-means
108 from scipy.cluster.vq import kmeans, whiten, vq
109 features = whiten(features) 135 features = whiten(features)
110 centroids,distortion = kmeans(features,k, iter) 136 centroids,distortion = kmeans(features,k, iter)
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) 137 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
112 return code,sigma 138 return code,sigma
113 139
177 203
178 return prototypeIndices, labels 204 return prototypeIndices, labels
179 205
180 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1): 206 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
181 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices} 207 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
182 clusterSizes['outlier'] = sum(np.array(labels) == -1) 208 clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
183 return clusterSizes 209 return clusterSizes
210
211 # Gaussian Mixture Models
212 def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nPixelsPerUnit = 1., alpha = 0.3):
213 '''plot the ellipse corresponding to the Gaussians
214 and the predicted classes of the instances in the dataset'''
215 if fig is None:
216 fig = plt.figure()
217 labels = model.predict(dataset)
218 tmpDataset = nPixelsPerUnit*dataset
219 for i in xrange(model.n_components):
220 mean = nPixelsPerUnit*model.means_[i]
221 covariance = nPixelsPerUnit*model.covars_[i]
222 if dataset is not None:
223 plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i])
224 plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
225
226 # Plot an ellipse to show the Gaussian component
227 v, w = np.linalg.eigh(covariance)
228 angle = np.arctan2(w[0][1], w[0][0])
229 angle = 180*angle/np.pi # convert to degrees
230 v *= 4
231 ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
232 ell.set_clip_box(fig.bbox)
233 ell.set_alpha(alpha)
234 fig.axes[0].add_artist(ell)