diff python/ml.py @ 795:a34ec862371f

merged with dev branch
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Mon, 09 May 2016 15:33:11 -0400
parents 1158a6e2d28e
children 180b6b0231c0
line wrap: on
line diff
--- a/python/ml.py	Tue Nov 03 13:48:56 2015 -0500
+++ b/python/ml.py	Mon May 09 15:33:11 2016 -0400
@@ -1,13 +1,29 @@
 #! /usr/bin/env python
 '''Libraries for machine learning algorithms'''
 
+from os import path
+from random import shuffle
+from copy import copy, deepcopy
+
 import numpy as np
-
+from matplotlib.pylab import text
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from scipy.cluster.vq import kmeans, whiten, vq
+from sklearn import mixture
+import cv2
 
-class Model(object):
-    '''Abstract class for loading/saving model'''    
+import utils
+
+#####################
+# OpenCV ML models
+#####################
+
+class StatModel(object):
+    '''Abstract class for loading/saving model
+
+    Issues with OpenCV, does not seem to work'''    
     def load(self, filename):
-        from os import path
         if path.exists(filename):
             self.model.load(filename)
         else:
@@ -16,21 +32,36 @@
     def save(self, filename):
         self.model.save(filename)
 
-class SVM(Model):
+class SVM(StatModel):
     '''wrapper for OpenCV SimpleVectorMachine algorithm'''
+    def __init__(self, svmType = cv2.ml.SVM_C_SVC, kernelType = cv2.ml.SVM_RBF, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
+        self.model = cv2.ml.SVM_create()
+        self.model.setType(svmType)
+        self.model.setKernel(kernelType)
+        self.model.setDegree(degree)
+        self.model.setGamma(gamma)
+        self.model.setCoef0(coef0)
+        self.model.setC(Cvalue)
+        self.model.setNu(nu)
+        self.model.setP(p)
 
-    def __init__(self):
-        import cv2
-        self.model = cv2.SVM()
+    def load(self, filename):
+        if path.exists(filename):
+            cv2.ml.SVM_load(filename)
+        else:
+            print('Provided filename {} does not exist: model not loaded!'.format(filename))
 
-    def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0):
-        self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p)
-        self.model.train(samples, responses, params = self.params)
+    def train(self, samples, layout, responses):
+        self.model.train(samples, layout, responses)
 
     def predict(self, hog):
         return self.model.predict(hog)
 
 
+#####################
+# Clustering
+#####################
+
 class Centroid(object):
     'Wrapper around instances to add a counter'
 
@@ -52,7 +83,6 @@
         return Centroid(inst, self.nInstances+instance.nInstances)
 
     def plot(self, options = ''):
-        from matplotlib.pylab import text
         self.instance.plot(options)
         text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances))
 
@@ -68,9 +98,6 @@
 
     data: list of instances
     averageCentroid: '''
-
-    from random import shuffle
-    from copy import copy, deepcopy
     localdata = copy(data) # shallow copy to avoid modifying data
     if shuffleData:
         shuffle(localdata)
@@ -105,7 +132,6 @@
 	# by stacking eigenvectors as columns
 	features = np.array(V[:k]).T
 	# k-means
-	from scipy.cluster.vq import kmeans, whiten, vq
 	features = whiten(features)
 	centroids,distortion = kmeans(features,k, iter)
 	code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
@@ -179,5 +205,30 @@
 
 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
     clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
-    clusterSizes['outlier'] = sum(np.array(labels) == -1)
+    clusterSizes['outlier'] = sum(np.array(labels) == outlierIndex)
     return clusterSizes
+
+# Gaussian Mixture Models
+def plotGMMClusters(model, dataset = None, fig = None, colors = utils.colors, nPixelsPerUnit = 1., alpha = 0.3):
+    '''plot the ellipse corresponding to the Gaussians
+    and the predicted classes of the instances in the dataset'''
+    if fig is None:
+        fig = plt.figure()
+    labels = model.predict(dataset)
+    tmpDataset = nPixelsPerUnit*dataset
+    for i in xrange(model.n_components):
+        mean = nPixelsPerUnit*model.means_[i]
+        covariance = nPixelsPerUnit*model.covars_[i]
+        if dataset is not None:
+            plt.scatter(tmpDataset[labels == i, 0], tmpDataset[labels == i, 1], .8, color=colors[i])
+        plt.annotate(str(i), xy=(mean[0]+1, mean[1]+1))
+
+        # Plot an ellipse to show the Gaussian component                                                  
+        v, w = np.linalg.eigh(covariance)
+        angle = np.arctan2(w[0][1], w[0][0])
+        angle = 180*angle/np.pi  # convert to degrees                                             
+	v *= 4
+        ell = mpl.patches.Ellipse(mean, v[0], v[1], 180+angle, color=colors[i])
+        ell.set_clip_box(fig.bbox)
+        ell.set_alpha(alpha)
+        fig.axes[0].add_artist(ell)