Mercurial > hg > nsaunier > traffic-intelligence
diff python/utils.py @ 708:a37c565f4b68
merged dev
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Wed, 22 Jul 2015 14:17:44 -0400 |
| parents | 8d99a9e16644 |
| children | c35e4a4b199d |
line wrap: on
line diff
--- a/python/utils.py Wed Jul 22 14:17:19 2015 -0400 +++ b/python/utils.py Wed Jul 22 14:17:44 2015 -0400 @@ -6,6 +6,7 @@ from datetime import time, datetime from math import sqrt, ceil, floor from scipy.stats import kruskal, shapiro +from scipy.spatial import distance from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma @@ -26,6 +27,13 @@ # Simple statistics ######################### +def logNormalMeanVar(loc, scale): + '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution + https://en.wikipedia.org/wiki/Log-normal_distribution''' + mean = exp(loc+(scale**2)/2) + var = (exp(loc**2)-1)*exp(2*loc+scale**2) + return mean, var + def sampleSize(stdev, tolerance, percentConfidence, printLatex = False): from scipy.stats.distributions import norm k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. @@ -232,6 +240,7 @@ return ceil(v*tens)/tens def inBetween(bound1, bound2, x): + 'useful if one does not know the order of bound1/bound2' return bound1 <= x <= bound2 or bound2 <= x <= bound1 def pointDistanceL2(x1,y1,x2,y2): @@ -619,24 +628,55 @@ class LCSS(object): '''Class that keeps the LCSS parameters - and puts together the various computations''' - def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): - self.similarityFunc = similarityFunc - self.aligned = aligned - self.delta = delta - self.lengthFunc = lengthFunc - self.subSequenceIndices = [(0,0)] + and puts together the various computations + + the methods with names starting with _ are not to be shadowed + in child classes, who will shadow the other methods, + ie compute and computeXX methods''' + def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min): + '''One should provide either a similarity function + that indicates (return bool) whether elements in the compares lists are similar + + eg distance(p1, p2) < epsilon + + or a type of metric usable in scipy.spatial.distance.cdist with an epsilon''' + if similarityFunc is None and metric is None: + print("No way to compute LCSS, similarityFunc and metric are None. Exiting") + import sys + sys.exit() + elif metric is not None and epsilon is None: + print("Please provide a value for epsilon if using a cdist metric. Exiting") + import sys + sys.exit() + else: + self.similarityFunc = similarityFunc + self.metric = metric + self.epsilon = epsilon + self.aligned = aligned + self.delta = delta + self.lengthFunc = lengthFunc + self.subSequenceIndices = [(0,0)] def similarities(self, l1, l2, jshift=0): n1 = len(l1) n2 = len(l2) self.similarityTable = zeros((n1+1,n2+1), dtype = npint) - for i in xrange(1,n1+1): - for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): - if self.similarityFunc(l1[i-1], l2[j-1]): - self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 - else: - self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) + if self.similarityFunc is not None: + for i in xrange(1,n1+1): + for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): + if self.similarityFunc(l1[i-1], l2[j-1]): + self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 + else: + self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) + elif self.metric is not None: + similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon + for i in xrange(1,n1+1): + for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): + if similarElements[i-1, j-1]: + self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 + else: + self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) + def subSequence(self, i, j): '''Returns the subsequence of two sequences @@ -652,12 +692,11 @@ def _compute(self, _l1, _l2, computeSubSequence = False): '''returns the longest common subsequence similarity - based on the threshold on distance between two elements of lists l1, l2 - similarityFunc returns True or False whether the two points are considered similar + l1 and l2 should be the right format + eg list of tuple points for cdist + or elements that can be compare using similarityFunc if aligned, returns the best matching if using a finite delta by shifting the series alignments - - eg distance(p1, p2) < epsilon ''' if len(_l2) < len(_l1): # l1 is the shortest l1 = _l2
