comparison python/utils.py @ 708:a37c565f4b68

merged dev
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Wed, 22 Jul 2015 14:17:44 -0400
parents 8d99a9e16644
children c35e4a4b199d
comparison
equal deleted inserted replaced
707:7efa36b9bcfd 708:a37c565f4b68
4 4
5 import matplotlib.pyplot as plt 5 import matplotlib.pyplot as plt
6 from datetime import time, datetime 6 from datetime import time, datetime
7 from math import sqrt, ceil, floor 7 from math import sqrt, ceil, floor
8 from scipy.stats import kruskal, shapiro 8 from scipy.stats import kruskal, shapiro
9 from scipy.spatial import distance
9 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma 10 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma
10 11
11 12
12 datetimeFormat = "%Y-%m-%d %H:%M:%S" 13 datetimeFormat = "%Y-%m-%d %H:%M:%S"
13 14
23 return result 24 return result
24 25
25 ######################### 26 #########################
26 # Simple statistics 27 # Simple statistics
27 ######################### 28 #########################
29
30 def logNormalMeanVar(loc, scale):
31 '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution
32 https://en.wikipedia.org/wiki/Log-normal_distribution'''
33 mean = exp(loc+(scale**2)/2)
34 var = (exp(loc**2)-1)*exp(2*loc+scale**2)
35 return mean, var
28 36
29 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False): 37 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False):
30 from scipy.stats.distributions import norm 38 from scipy.stats.distributions import norm
31 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. 39 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
32 if printLatex: 40 if printLatex:
230 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' 238 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3'''
231 tens = 10**nDecimals 239 tens = 10**nDecimals
232 return ceil(v*tens)/tens 240 return ceil(v*tens)/tens
233 241
234 def inBetween(bound1, bound2, x): 242 def inBetween(bound1, bound2, x):
243 'useful if one does not know the order of bound1/bound2'
235 return bound1 <= x <= bound2 or bound2 <= x <= bound1 244 return bound1 <= x <= bound2 or bound2 <= x <= bound1
236 245
237 def pointDistanceL2(x1,y1,x2,y2): 246 def pointDistanceL2(x1,y1,x2,y2):
238 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)''' 247 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)'''
239 return sqrt((x2-x1)**2+(y2-y1)**2) 248 return sqrt((x2-x1)**2+(y2-y1)**2)
617 # sequence section 626 # sequence section
618 ######################### 627 #########################
619 628
620 class LCSS(object): 629 class LCSS(object):
621 '''Class that keeps the LCSS parameters 630 '''Class that keeps the LCSS parameters
622 and puts together the various computations''' 631 and puts together the various computations
623 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): 632
624 self.similarityFunc = similarityFunc 633 the methods with names starting with _ are not to be shadowed
625 self.aligned = aligned 634 in child classes, who will shadow the other methods,
626 self.delta = delta 635 ie compute and computeXX methods'''
627 self.lengthFunc = lengthFunc 636 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min):
628 self.subSequenceIndices = [(0,0)] 637 '''One should provide either a similarity function
638 that indicates (return bool) whether elements in the compares lists are similar
639
640 eg distance(p1, p2) < epsilon
641
642 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon'''
643 if similarityFunc is None and metric is None:
644 print("No way to compute LCSS, similarityFunc and metric are None. Exiting")
645 import sys
646 sys.exit()
647 elif metric is not None and epsilon is None:
648 print("Please provide a value for epsilon if using a cdist metric. Exiting")
649 import sys
650 sys.exit()
651 else:
652 self.similarityFunc = similarityFunc
653 self.metric = metric
654 self.epsilon = epsilon
655 self.aligned = aligned
656 self.delta = delta
657 self.lengthFunc = lengthFunc
658 self.subSequenceIndices = [(0,0)]
629 659
630 def similarities(self, l1, l2, jshift=0): 660 def similarities(self, l1, l2, jshift=0):
631 n1 = len(l1) 661 n1 = len(l1)
632 n2 = len(l2) 662 n2 = len(l2)
633 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) 663 self.similarityTable = zeros((n1+1,n2+1), dtype = npint)
634 for i in xrange(1,n1+1): 664 if self.similarityFunc is not None:
635 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): 665 for i in xrange(1,n1+1):
636 if self.similarityFunc(l1[i-1], l2[j-1]): 666 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
637 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 667 if self.similarityFunc(l1[i-1], l2[j-1]):
638 else: 668 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
639 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) 669 else:
670 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
671 elif self.metric is not None:
672 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon
673 for i in xrange(1,n1+1):
674 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1):
675 if similarElements[i-1, j-1]:
676 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1
677 else:
678 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1])
679
640 680
641 def subSequence(self, i, j): 681 def subSequence(self, i, j):
642 '''Returns the subsequence of two sequences 682 '''Returns the subsequence of two sequences
643 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' 683 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem'''
644 if i == 0 or j == 0: 684 if i == 0 or j == 0:
650 else: 690 else:
651 return self.subSequence(i-1, j-1) + [(i-1,j-1)] 691 return self.subSequence(i-1, j-1) + [(i-1,j-1)]
652 692
653 def _compute(self, _l1, _l2, computeSubSequence = False): 693 def _compute(self, _l1, _l2, computeSubSequence = False):
654 '''returns the longest common subsequence similarity 694 '''returns the longest common subsequence similarity
655 based on the threshold on distance between two elements of lists l1, l2 695 l1 and l2 should be the right format
656 similarityFunc returns True or False whether the two points are considered similar 696 eg list of tuple points for cdist
697 or elements that can be compare using similarityFunc
657 698
658 if aligned, returns the best matching if using a finite delta by shifting the series alignments 699 if aligned, returns the best matching if using a finite delta by shifting the series alignments
659
660 eg distance(p1, p2) < epsilon
661 ''' 700 '''
662 if len(_l2) < len(_l1): # l1 is the shortest 701 if len(_l2) < len(_l1): # l1 is the shortest
663 l1 = _l2 702 l1 = _l2
664 l2 = _l1 703 l2 = _l1
665 revertIndices = True 704 revertIndices = True