nsaunier/traffic-intelligence: python/utils.py comparison

comparison python/utils.py @ 708:a37c565f4b68

merged dev

author	Nicolas Saunier <nicolas.saunier@polymtl.ca>
date	Wed, 22 Jul 2015 14:17:44 -0400
parents	8d99a9e16644
children	c35e4a4b199d

comparison

equal deleted inserted replaced

-:7efa36b9bcfd
+:a37c565f4b68
 import matplotlib.pyplot as plt
 from datetime import time, datetime
 from math import sqrt, ceil, floor
 from scipy.stats import kruskal, shapiro
+from scipy.spatial import distance
 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve,  dtype, isnan, NaN, mean, ma
 datetimeFormat = "%Y-%m-%d %H:%M:%S"
 return result
 #########################
 # Simple statistics
 #########################
+def logNormalMeanVar(loc, scale):
+'''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution
+https://en.wikipedia.org/wiki/Log-normal_distribution'''
+mean = exp(loc+(scale**2)/2)
+var = (exp(loc**2)-1)*exp(2*loc+scale**2)
+return mean, var
 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False):
 from scipy.stats.distributions import norm
 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
 if printLatex:
 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3'''
 tens = 10**nDecimals
 return ceil(v*tens)/tens
 def inBetween(bound1, bound2, x):
+'useful if one does not know the order of bound1/bound2'
 return bound1 <= x <= bound2 or bound2 <= x <= bound1
 def pointDistanceL2(x1,y1,x2,y2):
 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)'''
 return sqrt((x2-x1)**2+(y2-y1)**2)
 # sequence section
 #########################
 class LCSS(object):
 '''Class that keeps the LCSS parameters
-and puts together the various computations'''
+and puts together the various computations
-def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
-self.similarityFunc = similarityFunc
+the methods with names starting with _ are not to be shadowed
-self.aligned = aligned
+in child classes, who will shadow the other methods,
-self.delta = delta
+ie compute and computeXX methods'''
-self.lengthFunc = lengthFunc
+def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
-self.subSequenceIndices = [(0,0)]
+'''One should provide either a similarity function
+that indicates (return bool) whether elements in the compares lists are similar
+eg distance(p1, p2) < epsilon
+or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
+if similarityFunc is None and metric is None:
+print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
+import sys
+sys.exit()
+elif metric is not None and epsilon is None:
+print("Please provide a value for epsilon if using a cdist metric. Exiting")
+import sys
+sys.exit()
+else:
+self.similarityFunc = similarityFunc
+self.metric = metric
+self.epsilon = epsilon
+self.aligned = aligned
+self.delta = delta
+self.lengthFunc = lengthFunc
+self.subSequenceIndices = [(0,0)]
 def similarities(self, l1, l2, jshift=0):
 n1 = len(l1)
 n2 = len(l2)
 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
-for i in xrange(1,n1+1):
+if self.similarityFunc is not None:
-for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+for i in xrange(1,n1+1):
-if self.similarityFunc(l1[i-1], l2[j-1]):
+for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
-self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+if self.similarityFunc(l1[i-1], l2[j-1]):
-else:
+self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
-self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+else:
+self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+elif self.metric is not None:
+similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
+for i in xrange(1,n1+1):
+for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+if similarElements[i-1, j-1]:
+self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+else:
+self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
 def subSequence(self, i, j):
 '''Returns the subsequence of two sequences
 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
 if i == 0 or j == 0:
 else:
 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
 def _compute(self, _l1, _l2, computeSubSequence = False):
 '''returns the longest common subsequence similarity
-based on the threshold on distance between two elements of lists l1, l2
+l1 and l2 should be the right format
-similarityFunc returns True or False whether the two points are considered similar
+eg list of tuple points for cdist
+or elements that can be compare using similarityFunc
 if aligned, returns the best matching if using a finite delta by shifting the series alignments
-eg distance(p1, p2) < epsilon
 '''
 if len(_l2) < len(_l1): # l1 is the shortest
 l1 = _l2
 l2 = _l1
 revertIndices = True

Mercurial > hg > nsaunier > traffic-intelligence

comparison python/utils.py @ 708:a37c565f4b68