Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 689:9990ef119bce dev
added version of LCSS with cdist computations
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Mon, 29 Jun 2015 08:35:27 -0400 |
| parents | de278c5e65f6 |
| children | 8d99a9e16644 |
comparison
equal
deleted
inserted
replaced
| 688:f2b52355a286 | 689:9990ef119bce |
|---|---|
| 4 | 4 |
| 5 import matplotlib.pyplot as plt | 5 import matplotlib.pyplot as plt |
| 6 from datetime import time, datetime | 6 from datetime import time, datetime |
| 7 from math import sqrt, ceil, floor | 7 from math import sqrt, ceil, floor |
| 8 from scipy.stats import kruskal, shapiro | 8 from scipy.stats import kruskal, shapiro |
| 9 from scipy.spatial import distance | |
| 9 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma | 10 from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, median, isnan, ones, convolve, dtype, isnan, NaN, mean, ma |
| 10 | 11 |
| 11 | 12 |
| 12 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 13 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
| 13 | 14 |
| 629 and puts together the various computations | 630 and puts together the various computations |
| 630 | 631 |
| 631 the methods with names starting with _ are not to be shadowed | 632 the methods with names starting with _ are not to be shadowed |
| 632 in child classes, who will shadow the other methods, | 633 in child classes, who will shadow the other methods, |
| 633 ie compute and computeXX methods''' | 634 ie compute and computeXX methods''' |
| 634 def __init__(self, similarityFunc, delta = float('inf'), aligned = False, lengthFunc = min): | 635 def __init__(self, similarityFunc = None, metric = None, epsilon = None, delta = float('inf'), aligned = False, lengthFunc = min): |
| 635 self.similarityFunc = similarityFunc | 636 '''One should provide either a similarity function |
| 636 self.aligned = aligned | 637 that indicates (return bool) whether elements in the compares lists are similar |
| 637 self.delta = delta | 638 |
| 638 self.lengthFunc = lengthFunc | 639 eg distance(p1, p2) < epsilon |
| 639 self.subSequenceIndices = [(0,0)] | 640 |
| 641 or a type of metric usable in scipy.spatial.distance.cdist with an epsilon''' | |
| 642 if similarityFunc is None and metric is None: | |
| 643 print("No way to compute LCSS, similarityFunc and metric are None. Exiting") | |
| 644 import sys | |
| 645 sys.exit() | |
| 646 elif metric is not None and epsilon is None: | |
| 647 print("Please provide a value for epsilon if using a cdist metric. Exiting") | |
| 648 import sys | |
| 649 sys.exit() | |
| 650 else: | |
| 651 self.similarityFunc = similarityFunc | |
| 652 self.metric = metric | |
| 653 self.epsilon = epsilon | |
| 654 self.aligned = aligned | |
| 655 self.delta = delta | |
| 656 self.lengthFunc = lengthFunc | |
| 657 self.subSequenceIndices = [(0,0)] | |
| 640 | 658 |
| 641 def similarities(self, l1, l2, jshift=0): | 659 def similarities(self, l1, l2, jshift=0): |
| 642 n1 = len(l1) | 660 n1 = len(l1) |
| 643 n2 = len(l2) | 661 n2 = len(l2) |
| 644 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) | 662 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
| 645 for i in xrange(1,n1+1): | 663 if self.similarityFunc is not None: |
| 646 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | 664 for i in xrange(1,n1+1): |
| 647 if self.similarityFunc(l1[i-1], l2[j-1]): | 665 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
| 648 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | 666 if self.similarityFunc(l1[i-1], l2[j-1]): |
| 649 else: | 667 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
| 650 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | 668 else: |
| 669 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | |
| 670 elif self.metric is not None: | |
| 671 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon | |
| 672 for i in xrange(1,n1+1): | |
| 673 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | |
| 674 if similarElements[i-1, j-1]: | |
| 675 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | |
| 676 else: | |
| 677 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | |
| 678 | |
| 651 | 679 |
| 652 def subSequence(self, i, j): | 680 def subSequence(self, i, j): |
| 653 '''Returns the subsequence of two sequences | 681 '''Returns the subsequence of two sequences |
| 654 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' | 682 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' |
| 655 if i == 0 or j == 0: | 683 if i == 0 or j == 0: |
| 661 else: | 689 else: |
| 662 return self.subSequence(i-1, j-1) + [(i-1,j-1)] | 690 return self.subSequence(i-1, j-1) + [(i-1,j-1)] |
| 663 | 691 |
| 664 def _compute(self, _l1, _l2, computeSubSequence = False): | 692 def _compute(self, _l1, _l2, computeSubSequence = False): |
| 665 '''returns the longest common subsequence similarity | 693 '''returns the longest common subsequence similarity |
| 666 based on the threshold on distance between two elements of lists l1, l2 | 694 l1 and l2 should be the right format |
| 667 similarityFunc returns True or False whether the two points are considered similar | 695 eg list of tuple points for cdist |
| 696 or elements that can be compare using similarityFunc | |
| 668 | 697 |
| 669 if aligned, returns the best matching if using a finite delta by shifting the series alignments | 698 if aligned, returns the best matching if using a finite delta by shifting the series alignments |
| 670 | |
| 671 eg distance(p1, p2) < epsilon | |
| 672 ''' | 699 ''' |
| 673 if len(_l2) < len(_l1): # l1 is the shortest | 700 if len(_l2) < len(_l1): # l1 is the shortest |
| 674 l1 = _l2 | 701 l1 = _l2 |
| 675 l2 = _l1 | 702 l2 = _l1 |
| 676 revertIndices = True | 703 revertIndices = True |
