# HG changeset patch # User Nicolas Saunier # Date 1374011107 14400 # Node ID d0b86ed50f32ce5e6565347d499ecf9edb84f9be # Parent 349eb1e09f450701e07d2d0c973fd5b10963003a work in progress on LCSS diff -r 349eb1e09f45 -r d0b86ed50f32 python/utils.py --- a/python/utils.py Tue Jul 16 17:00:17 2013 -0400 +++ b/python/utils.py Tue Jul 16 17:45:07 2013 -0400 @@ -218,14 +218,14 @@ self.lengthFunc = lengthFunc self.alignmentShift = 0 - def similarities(self, l1, l2, shift=0): + def similarities(self, l1, l2, jshift=0): from numpy import zeros, int as npint n1 = len(l1) n2 = len(l2) self.similarityTable = zeros((n1+1,n2+1), dtype = npint) for i in xrange(1,n1+1): - for j in xrange(max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1)): - #print max(1,i-shift-self.delta),min(n2+1,i-shift+self.delta+1) + for j in xrange(max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1)): + print max(1,i-jshift-self.delta),min(n2+1,i-jshift+self.delta+1) if self.similarityFunc(l1[i-1], l2[j-1]): self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 else: @@ -243,22 +243,29 @@ else: return self.subSequence(i-1, j-1) + [(i-1,j-1)] - def computeLCSS(self, l1, l2, computeSubSequence = False): + # def computeLCSS(self, l1, l2, computeSubSequence = False): + # '''returns the longest common subsequence similarity + # based on the threshold on distance between two elements of lists l1, l2 + # similarityFunc returns True or False whether the two points are considered similar + + # eg distance(p1, p2) < epsilon + # ''' + # #from numpy import argmax + # self.similarities(l1, l2) + # #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] + # if computeSubSequence: + # self.subSequenceIndices = self.subSequence(len(l1), len(l2)) + # return self.similarityTable.max() + + def _compute(self, _l1, _l2, computeSubSequence = False): '''returns the longest common subsequence similarity based on the threshold on distance between two elements of lists l1, l2 similarityFunc returns True or False whether the two points are considered similar + if aligned, returns the best matching if using a finite delta by shiftinig the series alignments + eg distance(p1, p2) < epsilon ''' - #from numpy import argmax - self.similarities(l1, l2) - #self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] - if computeSubSequence: - self.subSequenceIndices = self.subSequence(len(l1), len(l2)) - return self.similarityTable.max() - - def _compute(self, _l1, _l2, computeSubSequence = False): - '''returns the best matching if using a finite delta by shiftinig the series alignments''' if len(_l2) < len(_l1): # l1 is the shortest l1 = _l2 l2 = _l1 @@ -271,39 +278,31 @@ n2 = len(l2) if self.aligned: - # for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1)): # i is the alignment of the end of l1 in l2 - # print l1[min(-i-1,n1):] # min(n1+n2-i,n1) - # print l2[max(0,i-n1):] - # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) lcssValues = {} similarityTables = {} - #for i in xrange(min(self.delta,n1), max(n1+n2-self.delta, n2+1)): - for i in xrange(-max(n1,n2)-self.delta, +max(n1,n2)+self.delta): + for i in xrange(-n1-self.delta-1, +max(n1,n2)+self.delta): # larger interval #print l1[min(-i-1,n1):] # min(n1+n2-i,n1) #print l2[max(0,i-n1):] #lcssValues[i] = self.computeLCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):]) #print i, lcssValues[i] - lcssValues[i] = self.similarities(l1, l2, i) + self.similarities(l1, l2, i) + lcssValues[i] = self.similarityTable.max() similarityTables[i] = self.similarityTable - #print i print self.similarityTable - self.alignmentShift = argMaxDict(lcssValues) + self.alignmentShift = argMaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance self.similarityTable = similarityTables[self.alignmentShift] - # do the subsequence computation here, once similarityTable is set - #self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) - #lcss = lcssValues[imax] else: self.alignmentShift = 0 self.similarities(l1, l2) - self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta)+1] + + self.similarityTable = self.similarityTable[:, :min(len(l2), len(l1)+self.delta-self.alignmentShift)+1] + if computeSubSequence: self.subSequenceIndices = self.subSequence(self.similarityTable.shape[0]-1, self.similarityTable.shape[1]-1) if revertIndices: self.subSequenceIndices = [(j+self.alignmentShift,i) for i,j in self.subSequenceIndices] - #self.alignmentShift = imax-n1 else: self.subSequenceIndices = [(i+self.alignmentShift,j) for i,j in self.subSequenceIndices] - #self.alignmentShift = n1-imax return self.similarityTable[-1,-1] def compute(self, l1, l2, computeSubSequence = False):