Mercurial > hg > nsaunier > traffic-intelligence
annotate python/ml.py @ 478:d337bffd7283
Display of points in compute homography and step option to replay videos
A bug seems to remain with respect to trajectory bounds, to check
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Thu, 27 Mar 2014 11:40:28 -0400 |
| parents | 8bc632cb8344 |
| children | c81cbd6953fb |
| rev | line source |
|---|---|
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
1 #! /usr/bin/env python |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
2 '''Libraries for machine learning algorithms''' |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
3 |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
4 import numpy as np |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
5 |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
6 __metaclass__ = type |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
7 |
| 380 | 8 class Model(object): |
| 9 '''Abstract class for loading/saving model''' | |
| 10 def load(self, fn): | |
| 11 self.model.load(fn) | |
| 12 | |
| 13 def save(self, fn): | |
| 14 self.model.save(fn) | |
| 15 | |
| 16 class SVM(Model): | |
| 17 '''wrapper for OpenCV SimpleVectorMachine algorithm''' | |
| 18 | |
| 19 def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): | |
| 20 import cv2 | |
| 21 self.model = cv2.SVM() | |
| 22 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) | |
| 23 | |
| 24 def train(self, samples, responses): | |
| 25 self.model.train(samples, responses, params = self.params) | |
| 26 | |
| 27 def predict(self, sample): | |
| 28 return np.float32(self.model.predict(s)) | |
| 29 | |
| 30 | |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
31 class Centroid: |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
32 'Wrapper around instances to add a counter' |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
33 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
34 def __init__(self, instance, nInstances = 1): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
35 self.instance = instance |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
36 self.nInstances = nInstances |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
37 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
38 # def similar(instance2): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
39 # return self.instance.similar(instance2) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
40 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
41 def add(self, instance2): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
42 self.instance = self.instance.multiply(self.nInstances)+instance2 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
43 self.nInstances += 1 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
44 self.instance = self.instance.multiply(1/float(self.nInstances)) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
45 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
46 def average(c): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
47 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
48 inst.multiply(1/(self.nInstances+instance.nInstances)) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
49 return Centroid(inst, self.nInstances+instance.nInstances) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
50 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
51 def draw(self, options = ''): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
52 from matplotlib.pylab import text |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
53 self.instance.draw(options) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
55 |
| 386 | 56 def kMedoids(similarityMatrix, initialCentroids = None, k = None): |
| 57 '''Algorithm that clusters any dataset based on a similarity matrix | |
| 58 Either the initialCentroids or k are passed''' | |
| 59 pass | |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
60 |
|
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
61 def assignCluster(data, similarFunc, initialCentroids = [], shuffleData = True): |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
62 '''k-means algorithm with similarity function |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
63 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
64 The number of clusters will be determined accordingly |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
65 |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
66 data: list of instances |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
67 averageCentroid: ''' |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
68 |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
69 from random import shuffle |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
70 from copy import copy, deepcopy |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
71 localdata = copy(data) # shallow copy to avoid modifying data |
|
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
72 if shuffleData: |
|
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
73 shuffle(localdata) |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
74 if initialCentroids: |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
75 centroids = deepcopy(initialCentroids) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
76 else: |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
77 centroids = [Centroid(localdata[0])] |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
78 for instance in localdata[1:]: |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
79 i = 0 |
|
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
80 while i<len(centroids) and not similarFunc(centroids[i].instance, instance): |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
81 i += 1 |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
82 if i == len(centroids): |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
83 centroids.append(Centroid(instance)) |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
84 else: |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
85 centroids[i].add(instance) |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
86 |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
87 return centroids |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
88 |
|
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
89 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements |
|
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
90 |
|
293
ee3302528cdc
rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
285
diff
changeset
|
91 def spectralClustering(similarityMatrix, k, iter=20): |
|
285
5957aa1d69e1
Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
184
diff
changeset
|
92 '''Spectral Clustering algorithm''' |
|
5957aa1d69e1
Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
184
diff
changeset
|
93 n = len(similarityMatrix) |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
94 # create Laplacian matrix |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
95 rowsum = np.sum(similarityMatrix,axis=0) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
96 D = np.diag(1 / np.sqrt(rowsum)) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
97 I = np.identity(n) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
98 L = I - np.dot(D,np.dot(similarityMatrix,D)) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
99 # compute eigenvectors of L |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
100 U,sigma,V = np.linalg.svd(L) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
101 # create feature vector from k first eigenvectors |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
102 # by stacking eigenvectors as columns |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
103 features = np.array(V[:k]).T |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
104 # k-means |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
105 from scipy.cluster.vq import kmeans, whiten, vq |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
106 features = whiten(features) |
|
293
ee3302528cdc
rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
285
diff
changeset
|
107 centroids,distortion = kmeans(features,k, iter) |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
108 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
| 309 | 109 return code,sigma |
