diff python/utils.py @ 708:a37c565f4b68

merged dev
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 22 Jul 2015 14:17:44 -0400
parents 8d99a9e16644
children c35e4a4b199d
line wrap: on
line diff
--- a/python/utils.py	Wed Jul 22 14:17:19 2015 -0400
+++ b/python/utils.py	Wed Jul 22 14:17:44 2015 -0400
@@ -6,6 +6,7 @@
 from datetime import time, datetime
 from math import sqrt, ceil, floor
 from scipy.stats import kruskal, shapiro
+from scipy.spatial import distance
 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve,  dtype, isnan, NaN, mean, ma
 
 
@@ -26,6 +27,13 @@
 # Simple statistics
 #########################
 
+def logNormalMeanVar(loc, scale):
+    '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution
+    https://en.wikipedia.org/wiki/Log-normal_distribution'''
+    mean = exp(loc+(scale**2)/2)
+    var = (exp(loc**2)-1)*exp(2*loc+scale**2)
+    return mean, var
+
 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False):
     from scipy.stats.distributions import norm
     k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
@@ -232,6 +240,7 @@
     return ceil(v*tens)/tens
 
 def inBetween(bound1, bound2, x):
+    'useful if one does not know the order of bound1/bound2'
     return bound1 <= x <= bound2 or bound2 <= x <= bound1
 
 def pointDistanceL2(x1,y1,x2,y2):
@@ -619,24 +628,55 @@
 
 class LCSS(object):
     '''Class that keeps the LCSS parameters
-    and puts together the various computations'''
-    def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min):
-        self.similarityFunc = similarityFunc
-        self.aligned = aligned
-        self.delta = delta
-        self.lengthFunc = lengthFunc
-        self.subSequenceIndices = [(0,0)]
+    and puts together the various computations
+
+    the methods with names starting with _ are not to be shadowed
+    in child classes, who will shadow the other methods, 
+    ie compute and computeXX methods'''
+    def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
+        '''One should provide either a similarity function
+        that indicates (return bool) whether elements in the compares lists are similar
+
+        eg distance(p1, p2) < epsilon
+        
+        or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
+        if similarityFunc is None and metric is None:
+            print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
+            import sys
+            sys.exit()
+        elif metric is not None and epsilon is None:
+            print("Please provide a value for epsilon if using a cdist metric. Exiting")
+            import sys
+            sys.exit()
+        else:
+            self.similarityFunc = similarityFunc
+            self.metric = metric
+            self.epsilon = epsilon
+            self.aligned = aligned
+            self.delta = delta
+            self.lengthFunc = lengthFunc
+            self.subSequenceIndices = [(0,0)]
 
     def similarities(self, l1, l2, jshift=0):
         n1 = len(l1)
         n2 = len(l2)
         self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
-        for i in xrange(1,n1+1):
-            for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
-                if self.similarityFunc(l1[i-1], l2[j-1]):
-                    self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
-                else:
-                    self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+        if self.similarityFunc is not None:
+            for i in xrange(1,n1+1):
+                for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+                    if self.similarityFunc(l1[i-1], l2[j-1]):
+                        self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+                    else:
+                        self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+        elif self.metric is not None:
+            similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
+            for i in xrange(1,n1+1):
+                for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
+                    if similarElements[i-1, j-1]:
+                        self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
+                    else:
+                        self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
+            
 
     def subSequence(self, i, j):
         '''Returns the subsequence of two sequences
@@ -652,12 +692,11 @@
 
     def _compute(self, _l1, _l2, computeSubSequence = False):
         '''returns the longest common subsequence similarity
-        based on the threshold on distance between two elements of lists l1, l2
-        similarityFunc returns True or False whether the two points are considered similar
+        l1 and l2 should be the right format
+        eg list of tuple points for cdist 
+        or elements that can be compare using similarityFunc
 
         if aligned, returns the best matching if using a finite delta by shifting the series alignments
-
-        eg distance(p1, p2) < epsilon
         '''
         if len(_l2) < len(_l1): # l1 is the shortest
             l1 = _l2