Mercurial > hg > nsaunier > traffic-intelligence
comparison python/ml.py @ 614:5e09583275a4
Merged Nicolas/trafficintelligence into default
| author | Mohamed Gomaa <eng.m.gom3a@gmail.com> |
|---|---|
| date | Fri, 05 Dec 2014 12:13:53 -0500 |
| parents | 39de5c532559 |
| children | 3058e00887bc |
comparison
equal
deleted
inserted
replaced
| 598:11f96bd08552 | 614:5e09583275a4 |
|---|---|
| 2 '''Libraries for machine learning algorithms''' | 2 '''Libraries for machine learning algorithms''' |
| 3 | 3 |
| 4 import numpy as np | 4 import numpy as np |
| 5 | 5 |
| 6 __metaclass__ = type | 6 __metaclass__ = type |
| 7 | |
| 8 class Model(object): | |
| 9 '''Abstract class for loading/saving model''' | |
| 10 def load(self, fn): | |
| 11 self.model.load(fn) | |
| 12 | |
| 13 def save(self, fn): | |
| 14 self.model.save(fn) | |
| 15 | |
| 16 class SVM(Model): | |
| 17 '''wrapper for OpenCV SimpleVectorMachine algorithm''' | |
| 18 | |
| 19 def __init__(self, svm_type, kernel_type, degree = 0, gamma = 1, coef0 = 0, Cvalue = 1, nu = 0, p = 0): | |
| 20 import cv2 | |
| 21 self.model = cv2.SVM() | |
| 22 self.params = dict(svm_type = svm_type, kernel_type = kernel_type, degree = degree, gamma = gamma, coef0 = coef0, Cvalue = Cvalue, nu = nu, p = p) | |
| 23 | |
| 24 def train(self, samples, responses): | |
| 25 self.model.train(samples, responses, params = self.params) | |
| 26 | |
| 27 def predict(self, samples): | |
| 28 return np.float32([self.model.predict(s) for s in samples]) | |
| 29 | |
| 7 | 30 |
| 8 class Centroid: | 31 class Centroid: |
| 9 'Wrapper around instances to add a counter' | 32 'Wrapper around instances to add a counter' |
| 10 | 33 |
| 11 def __init__(self, instance, nInstances = 1): | 34 def __init__(self, instance, nInstances = 1): |
| 23 def average(c): | 46 def average(c): |
| 24 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) | 47 inst = self.instance.multiply(self.nInstances)+c.instance.multiply(instance.nInstances) |
| 25 inst.multiply(1/(self.nInstances+instance.nInstances)) | 48 inst.multiply(1/(self.nInstances+instance.nInstances)) |
| 26 return Centroid(inst, self.nInstances+instance.nInstances) | 49 return Centroid(inst, self.nInstances+instance.nInstances) |
| 27 | 50 |
| 28 def draw(self, options = ''): | 51 def plot(self, options = ''): |
| 29 from matplotlib.pylab import text | 52 from matplotlib.pylab import text |
| 30 self.instance.draw(options) | 53 self.instance.plot(options) |
| 31 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) | 54 text(self.instance.position.x+1, self.instance.position.y+1, str(self.nInstances)) |
| 32 | 55 |
| 56 def kMedoids(similarityMatrix, initialCentroids = None, k = None): | |
| 57 '''Algorithm that clusters any dataset based on a similarity matrix | |
| 58 Either the initialCentroids or k are passed''' | |
| 59 pass | |
| 33 | 60 |
| 34 def clustering(data, similar, initialCentroids = []): | 61 def assignCluster(data, similarFunc, initialCentroids = None, shuffleData = True): |
| 35 '''k-means algorithm with similarity function | 62 '''k-means algorithm with similarity function |
| 36 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. | 63 Two instances should be in the same cluster if the sameCluster function returns true for two instances. It is supposed that the average centroid of a set of instances can be computed, using the function. |
| 37 The number of clusters will be determined accordingly | 64 The number of clusters will be determined accordingly |
| 38 | 65 |
| 39 data: list of instances | 66 data: list of instances |
| 40 averageCentroid: ''' | 67 averageCentroid: ''' |
| 41 | 68 |
| 42 from random import shuffle | 69 from random import shuffle |
| 43 from copy import copy, deepcopy | 70 from copy import copy, deepcopy |
| 44 localdata = copy(data) # shallow copy to avoid modifying data | 71 localdata = copy(data) # shallow copy to avoid modifying data |
| 45 shuffle(localdata) | 72 if shuffleData: |
| 46 if initialCentroids: | 73 shuffle(localdata) |
| 74 if initialCentroids == None: | |
| 75 centroids = [Centroid(localdata[0])] | |
| 76 else: | |
| 47 centroids = deepcopy(initialCentroids) | 77 centroids = deepcopy(initialCentroids) |
| 48 else: | |
| 49 centroids = [Centroid(localdata[0])] | |
| 50 for instance in localdata[1:]: | 78 for instance in localdata[1:]: |
| 51 i = 0 | 79 i = 0 |
| 52 while i<len(centroids) and not similar(centroids[i].instance, instance): | 80 while i<len(centroids) and not similarFunc(centroids[i].instance, instance): |
| 53 i += 1 | 81 i += 1 |
| 54 if i == len(centroids): | 82 if i == len(centroids): |
| 55 centroids.append(Centroid(instance)) | 83 centroids.append(Centroid(instance)) |
| 56 else: | 84 else: |
| 57 centroids[i].add(instance) | 85 centroids[i].add(instance) |
| 58 | 86 |
| 59 return centroids | 87 return centroids |
| 88 | |
| 89 # TODO recompute centroids for each cluster: instance that minimizes some measure to all other elements | |
| 60 | 90 |
| 61 def spectralClustering(similarityMatrix, k, iter=20): | 91 def spectralClustering(similarityMatrix, k, iter=20): |
| 62 '''Spectral Clustering algorithm''' | 92 '''Spectral Clustering algorithm''' |
| 63 n = len(similarityMatrix) | 93 n = len(similarityMatrix) |
| 64 # create Laplacian matrix | 94 # create Laplacian matrix |
| 75 from scipy.cluster.vq import kmeans, whiten, vq | 105 from scipy.cluster.vq import kmeans, whiten, vq |
| 76 features = whiten(features) | 106 features = whiten(features) |
| 77 centroids,distortion = kmeans(features,k, iter) | 107 centroids,distortion = kmeans(features,k, iter) |
| 78 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) | 108 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) |
| 79 return code,sigma | 109 return code,sigma |
| 110 | |
| 111 def motionPatterLearning(objects, maxDistance): | |
| 112 ''' | |
| 113 Option to use only the (n?) longest features per object instead of all for speed up | |
| 114 TODO''' | |
| 115 pass | |
| 116 | |
| 117 def prototypeCluster(): | |
| 118 ''' | |
| 119 TODO''' | |
| 120 pass |
