Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 708:a37c565f4b68
merged dev
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Wed, 22 Jul 2015 14:17:44 -0400 |
| parents | 8d99a9e16644 |
| children | c35e4a4b199d |
comparison
equal
deleted
inserted
replaced
| 707:7efa36b9bcfd | 708:a37c565f4b68 |
|---|---|
| 4 | 4 |
| 5 import matplotlib.pyplot as plt | 5 import matplotlib.pyplot as plt |
| 6 from datetime import time, datetime | 6 from datetime import time, datetime |
| 7 from math import sqrt, ceil, floor | 7 from math import sqrt, ceil, floor |
| 8 from scipy.stats import kruskal, shapiro | 8 from scipy.stats import kruskal, shapiro |
| 9 from scipy.spatial import distance | |
| 9 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma | 10 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma |
| 10 | 11 |
| 11 | 12 |
| 12 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 13 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
| 13 | 14 |
| 23 return result | 24 return result |
| 24 | 25 |
| 25 ######################### | 26 ######################### |
| 26 # Simple statistics | 27 # Simple statistics |
| 27 ######################### | 28 ######################### |
| 29 | |
| 30 def logNormalMeanVar(loc, scale): | |
| 31 '''location and scale are respectively the mean and standard deviation of the normal in the log-normal distribution | |
| 32 https://en.wikipedia.org/wiki/Log-normal_distribution''' | |
| 33 mean = exp(loc+(scale**2)/2) | |
| 34 var = (exp(loc**2)-1)*exp(2*loc+scale**2) | |
| 35 return mean, var | |
| 28 | 36 |
| 29 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False): | 37 def sampleSize(stdev, tolerance, percentConfidence, printLatex = False): |
| 30 from scipy.stats.distributions import norm | 38 from scipy.stats.distributions import norm |
| 31 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. | 39 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. |
| 32 if printLatex: | 40 if printLatex: |
| 230 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' | 238 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' |
| 231 tens = 10**nDecimals | 239 tens = 10**nDecimals |
| 232 return ceil(v*tens)/tens | 240 return ceil(v*tens)/tens |
| 233 | 241 |
| 234 def inBetween(bound1, bound2, x): | 242 def inBetween(bound1, bound2, x): |
| 243 'useful if one does not know the order of bound1/bound2' | |
| 235 return bound1 <= x <= bound2 or bound2 <= x <= bound1 | 244 return bound1 <= x <= bound2 or bound2 <= x <= bound1 |
| 236 | 245 |
| 237 def pointDistanceL2(x1,y1,x2,y2): | 246 def pointDistanceL2(x1,y1,x2,y2): |
| 238 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)''' | 247 ''' Compute point-to-point distance (L2 norm, ie Euclidean distance)''' |
| 239 return sqrt((x2-x1)**2+(y2-y1)**2) | 248 return sqrt((x2-x1)**2+(y2-y1)**2) |
| 617 # sequence section | 626 # sequence section |
| 618 ######################### | 627 ######################### |
| 619 | 628 |
| 620 class LCSS(object): | 629 class LCSS(object): |
| 621 '''Class that keeps the LCSS parameters | 630 '''Class that keeps the LCSS parameters |
| 622 and puts together the various computations''' | 631 and puts together the various computations |
| 623 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): | 632 |
| 624 self.similarityFunc = similarityFunc | 633 the methods with names starting with _ are not to be shadowed |
| 625 self.aligned = aligned | 634 in child classes, who will shadow the other methods, |
| 626 self.delta = delta | 635 ie compute and computeXX methods''' |
| 627 self.lengthFunc = lengthFunc | 636 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min): |
| 628 self.subSequenceIndices = [(0,0)] | 637 '''One should provide either a similarity function |
| 638 that indicates (return bool) whether elements in the compares lists are similar | |
| 639 | |
| 640 eg distance(p1, p2) < epsilon | |
| 641 | |
| 642 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon''' | |
| 643 if similarityFunc is None and metric is None: | |
| 644 print("No way to compute LCSS, similarityFunc and metric are None. Exiting") | |
| 645 import sys | |
| 646 sys.exit() | |
| 647 elif metric is not None and epsilon is None: | |
| 648 print("Please provide a value for epsilon if using a cdist metric. Exiting") | |
| 649 import sys | |
| 650 sys.exit() | |
| 651 else: | |
| 652 self.similarityFunc = similarityFunc | |
| 653 self.metric = metric | |
| 654 self.epsilon = epsilon | |
| 655 self.aligned = aligned | |
| 656 self.delta = delta | |
| 657 self.lengthFunc = lengthFunc | |
| 658 self.subSequenceIndices = [(0,0)] | |
| 629 | 659 |
| 630 def similarities(self, l1, l2, jshift=0): | 660 def similarities(self, l1, l2, jshift=0): |
| 631 n1 = len(l1) | 661 n1 = len(l1) |
| 632 n2 = len(l2) | 662 n2 = len(l2) |
| 633 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) | 663 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
| 634 for i in xrange(1,n1+1): | 664 if self.similarityFunc is not None: |
| 635 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | 665 for i in xrange(1,n1+1): |
| 636 if self.similarityFunc(l1[i-1], l2[j-1]): | 666 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
| 637 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | 667 if self.similarityFunc(l1[i-1], l2[j-1]): |
| 638 else: | 668 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
| 639 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | 669 else: |
| 670 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | |
| 671 elif self.metric is not None: | |
| 672 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon | |
| 673 for i in xrange(1,n1+1): | |
| 674 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | |
| 675 if similarElements[i-1, j-1]: | |
| 676 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | |
| 677 else: | |
| 678 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | |
| 679 | |
| 640 | 680 |
| 641 def subSequence(self, i, j): | 681 def subSequence(self, i, j): |
| 642 '''Returns the subsequence of two sequences | 682 '''Returns the subsequence of two sequences |
| 643 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' | 683 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' |
| 644 if i == 0 or j == 0: | 684 if i == 0 or j == 0: |
| 650 else: | 690 else: |
| 651 return self.subSequence(i-1, j-1) + [(i-1,j-1)] | 691 return self.subSequence(i-1, j-1) + [(i-1,j-1)] |
| 652 | 692 |
| 653 def _compute(self, _l1, _l2, computeSubSequence = False): | 693 def _compute(self, _l1, _l2, computeSubSequence = False): |
| 654 '''returns the longest common subsequence similarity | 694 '''returns the longest common subsequence similarity |
| 655 based on the threshold on distance between two elements of lists l1, l2 | 695 l1 and l2 should be the right format |
| 656 similarityFunc returns True or False whether the two points are considered similar | 696 eg list of tuple points for cdist |
| 697 or elements that can be compare using similarityFunc | |
| 657 | 698 |
| 658 if aligned, returns the best matching if using a finite delta by shifting the series alignments | 699 if aligned, returns the best matching if using a finite delta by shifting the series alignments |
| 659 | |
| 660 eg distance(p1, p2) < epsilon | |
| 661 ''' | 700 ''' |
| 662 if len(_l2) < len(_l1): # l1 is the shortest | 701 if len(_l2) < len(_l1): # l1 is the shortest |
| 663 l1 = _l2 | 702 l1 = _l2 |
| 664 l2 = _l1 | 703 l2 = _l1 |
| 665 revertIndices = True | 704 revertIndices = True |
