Mercurial > hg > nsaunier > traffic-intelligence
annotate python/ml.py @ 700:0f1b6907643a dev
merged improvement from stable
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Fri, 17 Jul 2015 14:33:01 -0400 |
| parents | da1352b89d02 |
| children | b02431a8234c |
| rev | line source |
|---|---|
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
1 #! /usr/bin/env python |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
2 '''Libraries for machine learning algorithms''' |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
3 |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
4 import numpy as np |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
5 |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
6 |
| 380 | 7 class Model(object): |
| 8 '''Abstract class for loading/saving model''' | |
|
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
9 def load(self, filename): |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
10 from os import path |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
11 if path.exists(filename): |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
12 self.model.load(filename) |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
13 else: |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
14 print('Provided filename {} does not exist: model not loaded!'.format(filename)) |
| 380 | 15 |
|
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
16 def save(self, filename): |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
17 self.model.save(filename) |
| 380 | 18 |
| 19 class SVM(Model): | |
| 20 '''wrapper for OpenCV SimpleVectorMachine algorithm''' | |
| 21 | |
|
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
22 def __init__(self): |
| 380 | 23 import cv2 |
| 24 self.model = cv2.SVM() | |
|
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
25 |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
26 def train(self, samples, responses, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): |
| 380 | 27 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) |
| 28 self.model.train(samples, responses, params = self.params) | |
| 29 | |
|
680
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
30 def predict(self, hog): |
|
da1352b89d02
classification is working
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
665
diff
changeset
|
31 return self.model.predict(hog) |
| 380 | 32 |
| 33 | |
|
665
15e244d2a1b5
corrected bug with circular import for VideoFilenameAddable, moved to base module
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
636
diff
changeset
|
34 class Centroid(object): |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
35 'Wrapper around instances to add a counter' |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
36 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
37 def __init__(self, instance, nInstances = 1): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
38 self.instance = instance |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
39 self.nInstances = nInstances |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
40 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
41 # def similar(instance2): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
42 # return self.instance.similar(instance2) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
43 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
44 def add(self, instance2): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
45 self.instance = self.instance.multiply(self.nInstances)+instance2 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
46 self.nInstances += 1 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
47 self.instance = self.instance.multiply(1/float(self.nInstances)) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
48 |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
49 def average(c): |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
50 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
51 inst.multiply(1/(self.nInstances+instance.nInstances)) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
52 return Centroid(inst, self.nInstances+instance.nInstances) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
53 |
|
515
727e3c529519
renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
501
diff
changeset
|
54 def plot(self, options = ''): |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
55 from matplotlib.pylab import text |
|
515
727e3c529519
renamed all draw functions to plot for consistency
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
501
diff
changeset
|
56 self.instance.plot(options) |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
57 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
58 |
| 386 | 59 def kMedoids(similarityMatrix, initialCentroids = None, k = None): |
| 60 '''Algorithm that clusters any dataset based on a similarity matrix | |
| 61 Either the initialCentroids or k are passed''' | |
| 62 pass | |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
63 |
|
526
21bdeb29f855
corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
515
diff
changeset
|
64 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True): |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
65 '''k-means algorithm with similarity function |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
66 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
67 The number of clusters will be determined accordingly |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
68 |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
69 data: list of instances |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
70 averageCentroid: ''' |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
71 |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
72 from random import shuffle |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
73 from copy import copy, deepcopy |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
74 localdata = copy(data) # shallow copy to avoid modifying data |
|
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
75 if shuffleData: |
|
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
76 shuffle(localdata) |
|
636
3058e00887bc
removed all issues because of tests with None, using is instead of == or !=
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
563
diff
changeset
|
77 if initialCentroids is None: |
|
526
21bdeb29f855
corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
515
diff
changeset
|
78 centroids = [Centroid(localdata[0])] |
|
21bdeb29f855
corrected bug in initialization of lists and loading trajectories from vissim files
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
515
diff
changeset
|
79 else: |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
80 centroids = deepcopy(initialCentroids) |
|
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
81 for instance in localdata[1:]: |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
82 i = 0 |
|
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
83 while i<len(centroids) and not similarFunc(centroids[i].instance, instance): |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
84 i += 1 |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
85 if i == len(centroids): |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
86 centroids.append(Centroid(instance)) |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
87 else: |
|
184
d70e9b36889c
initial work on flow vectors and clustering algorithms
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
183
diff
changeset
|
88 centroids[i].add(instance) |
|
183
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
89 |
|
ed944ff45e8c
first simple clustering algorithm implementation
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
diff
changeset
|
90 return centroids |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
91 |
|
382
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
92 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements |
|
ba813f148ade
development for clustering
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
380
diff
changeset
|
93 |
|
293
ee3302528cdc
rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
285
diff
changeset
|
94 def spectralClustering(similarityMatrix, k, iter=20): |
|
285
5957aa1d69e1
Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
184
diff
changeset
|
95 '''Spectral Clustering algorithm''' |
|
5957aa1d69e1
Integrating Mohamed's changes
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
184
diff
changeset
|
96 n = len(similarityMatrix) |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
97 # create Laplacian matrix |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
98 rowsum = np.sum(similarityMatrix,axis=0) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
99 D = np.diag(1 / np.sqrt(rowsum)) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
100 I = np.identity(n) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
101 L = I - np.dot(D,np.dot(similarityMatrix,D)) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
102 # compute eigenvectors of L |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
103 U,sigma,V = np.linalg.svd(L) |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
104 # create feature vector from k first eigenvectors |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
105 # by stacking eigenvectors as columns |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
106 features = np.array(V[:k]).T |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
107 # k-means |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
108 from scipy.cluster.vq import kmeans, whiten, vq |
|
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
109 features = whiten(features) |
|
293
ee3302528cdc
rearranged new code by Paul (works now)
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
285
diff
changeset
|
110 centroids,distortion = kmeans(features,k, iter) |
|
308
8bafd054cda4
Added a function to compute LCSS distance between two indcators
Mohamed Gomaa
parents:
184
diff
changeset
|
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
| 309 | 112 return code,sigma |
|
563
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
113 |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
114 def motionPatterLearning(objects, maxDistance): |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
115 ''' |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
116 Option to use only the (n?) longest features per object instead of all for speed up |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
117 TODO''' |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
118 pass |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
119 |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
120 def prototypeCluster(): |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
121 ''' |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
122 TODO''' |
|
39de5c532559
place holder functions
Nicolas Saunier <nicolas.saunier@polymtl.ca>
parents:
526
diff
changeset
|
123 pass |
