comparison python/ml.py @ 746:e7ff0f60fef8

merged new developments (indicator and trajectory clustering)
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Thu, 10 Sep 2015 15:52:45 -0400
parents 2472b4d59aea
children 1f2b2d1f4fbf
comparison
equal deleted inserted replaced
727:c6d4ea05a2d0 746:e7ff0f60fef8
109 features = whiten(features) 109 features = whiten(features)
110 centroids,distortion = kmeans(features,k, iter) 110 centroids,distortion = kmeans(features,k, iter)
111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster) 111 code,distance = vq(features,centroids) # code starting from 0 (represent first cluster) to k-1 (last cluster)
112 return code,sigma 112 return code,sigma
113 113
114 def motionPatterLearning(objects, maxDistance): 114 def prototypeCluster(instances, similarities, minSimilarity, similarityFunc = None, minClusterSize = None, randomInitialization = False):
115 ''' 115 '''Finds exemplar (prototype) instance that represent each cluster
116 Option to use only the (n?) longest features per object instead of all for speed up 116 Returns the prototype indices (in the instances list) and the cluster label of each instance
117 TODO'''
118 pass
119 117
120 def prototypeCluster(): 118 the elements in the instances list must have a length (method __len__), or one can use the random initialization
121 ''' 119 the positions in the instances list corresponds to the similarities
122 TODO''' 120 if similarityFunc is provided, the similarities are calculated as needed (this is faster) if not in similarities (negative if not computed)
123 pass 121 similarities must still be allocated with the right size
122
123 if an instance is different enough (<minSimilarity),
124 it will become a new prototype.
125 Non-prototype instances will be assigned to an existing prototype
126 if minClusterSize is not None, the clusters will be refined by removing iteratively the smallest clusters
127 and reassigning all elements in the cluster until no cluster is smaller than minClusterSize'''
128
129 # sort instances based on length
130 indices = range(len(instances))
131 if randomInitialization:
132 indices = np.random.permutation(indices)
133 else:
134 def compare(i, j):
135 if len(instances[i]) > len(instances[j]):
136 return -1
137 elif len(instances[i]) == len(instances[j]):
138 return 0
139 else:
140 return 1
141 indices.sort(compare)
142 # go through all instances
143 prototypeIndices = [indices[0]]
144 for i in indices[1:]:
145 if similarityFunc is not None:
146 for j in prototypeIndices:
147 if similarities[i][j] < 0:
148 similarities[i][j] = similarityFunc(instances[i], instances[j])
149 similarities[j][i] = similarities[i][j]
150 if similarities[i][prototypeIndices].max() < minSimilarity:
151 prototypeIndices.append(i)
152
153 # assignment
154 indices = [i for i in range(similarities.shape[0]) if i not in prototypeIndices]
155 assign = True
156 while assign:
157 labels = [-1]*similarities.shape[0]
158 for i in prototypeIndices:
159 labels[i] = i
160 for i in indices:
161 if similarityFunc is not None:
162 for j in prototypeIndices:
163 if similarities[i][j] < 0:
164 similarities[i][j] = similarityFunc(instances[i], instances[j])
165 similarities[j][i] = similarities[i][j]
166 prototypeIdx = similarities[i][prototypeIndices].argmax()
167 if similarities[i][prototypeIndices[prototypeIdx]] >= minSimilarity:
168 labels[i] = prototypeIndices[prototypeIdx]
169 else:
170 labels[i] = -1 # outlier
171 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
172 smallestClusterIndex = min(clusterSizes, key = clusterSizes.get)
173 assign = (clusterSizes[smallestClusterIndex] < minClusterSize)
174 if assign:
175 prototypeIndices.remove(smallestClusterIndex)
176 indices.append(smallestClusterIndex)
177
178 return prototypeIndices, labels
179
180 def computeClusterSizes(labels, prototypeIndices, outlierIndex = -1):
181 clusterSizes = {i: sum(np.array(labels) == i) for i in prototypeIndices}
182 clusterSizes['outlier'] = sum(np.array(labels) == -1)
183 return clusterSizes