Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 368:2db4e76599a1
implemented subsequence extraction and rearranged arguments
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Mon, 15 Jul 2013 15:08:53 -0400 |
| parents | 90bdabc06e9f |
| children | 027e254f0b53 |
comparison
equal
deleted
inserted
replaced
| 367:d44eba0db517 | 368:2db4e76599a1 |
|---|---|
| 139 | 139 |
| 140 ######################### | 140 ######################### |
| 141 # sequence section | 141 # sequence section |
| 142 ######################### | 142 ######################### |
| 143 | 143 |
| 144 def LCSS(l1, l2, similarityFunc, delta = float('inf')): | 144 def sequenceSimilarities(l1, l2, similarityFunc, delta = float('inf')): |
| 145 from numpy import zeros, int as npint | |
| 146 n1 = len(l1) | |
| 147 n2 = len(l2) | |
| 148 similarities = zeros((n1+1,n2+1), dtype = npint) | |
| 149 for i in xrange(1,n1+1): | |
| 150 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): | |
| 151 if similarityFunc(l1[i-1], l2[j-1]): | |
| 152 similarities[i,j] = similarities[i-1,j-1]+1 | |
| 153 else: | |
| 154 similarities[i,j] = max(similarities[i-1,j], similarities[i,j-1]) | |
| 155 return similarities | |
| 156 | |
| 157 def subSequence(similarities, i, j): | |
| 158 '''Returns the subsequence of two sequences | |
| 159 http://en.wikipedia.org/wiki/Longest_common_subsequence_problem''' | |
| 160 if i == 0 or j == 0: | |
| 161 return [] | |
| 162 elif similarities[i][j] == similarities[i][j-1]: | |
| 163 return subSequence(similarities, i, j-1) | |
| 164 elif similarities[i][j] == similarities[i-1][j]: | |
| 165 return subSequence(similarities, i-1, j) | |
| 166 else: | |
| 167 return subSequence(similarities, i-1, j-1) + [(i-1,j-1)] | |
| 168 | |
| 169 def LCSS(l1, l2, similarityFunc, delta = float('inf'), returnSubSequence = False): | |
| 145 '''returns the longest common subsequence similarity | 170 '''returns the longest common subsequence similarity |
| 146 based on the threshold on distance between two elements of lists l1, l2 | 171 based on the threshold on distance between two elements of lists l1, l2 |
| 147 similarityFunc returns True or False whether the two points are considered similar | 172 similarityFunc returns True or False whether the two points are considered similar |
| 148 | 173 |
| 149 eg distance(p1, p2) < epsilon | 174 eg distance(p1, p2) < epsilon |
| 150 ''' | 175 ''' |
| 151 from numpy import zeros, int as npint | 176 from numpy import argmax |
| 152 n1 = len(l1) | 177 similarities = sequenceSimilarities(l1, l2, similarityFunc, delta) |
| 153 n2 = len(l2) | 178 imax = argmax(similarities[:,-1]) |
| 154 similarity = zeros((n1+1,n2+1), dtype = npint) | 179 jmax = argmax(similarities[-1,:]) |
| 155 for i in xrange(1,n1+1): | 180 if returnSubSequence: |
| 156 for j in xrange(max(1,i-delta),min(n2+1,i+delta+1)): | 181 if similarities[imax, -1] > similarities[-1, jmax]: |
| 157 if similarityFunc(l1[i-1], l2[j-1]): | 182 lcss = similarities[imax, -1] |
| 158 similarity[i][j] = similarity[i-1][j-1]+1 | 183 subseq = subSequence(similarities, imax, len(l2)) |
| 159 else: | 184 else: |
| 160 similarity[i][j] = max(similarity[i-1][j], similarity[i][j-1]) | 185 lcss = similarities[-1, jmax] |
| 161 return max(max(similarity[:,-1]), max(similarity[-1,:])) | 186 subseq = subSequence(similarities, len(l1), jmax) |
| 162 | 187 return lcss, subseq |
| 163 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min): | 188 else: |
| 189 return max(similarities[imax, -1], similarities[-1, jmax]) | |
| 190 | |
| 191 def normalizedLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): | |
| 164 ''' compute the normalized LCSS | 192 ''' compute the normalized LCSS |
| 165 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthMethod) | 193 ie, the LCSS divided by the min or mean of the indicator lengths (using lengthFunc) |
| 166 lengthMethod = lambda x,y:float(x,y)/2''' | 194 lengthFunc = lambda x,y:float(x,y)/2''' |
| 167 return float(LCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2)) | 195 return float(LCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) |
| 168 | 196 |
| 169 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthMethod = min): | 197 def DLCSS(l1, l2, similarityFunc, delta = float('inf'), lengthFunc = min): |
| 170 ''' compute the LCSS distance''' | 198 ''' compute the LCSS distance''' |
| 171 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthMethod) | 199 return 1-normalizedLCSS(l1, l2, similarityFunc, delta, lengthFunc) |
| 172 | 200 |
| 173 def alignedLCSS(_l1, _l2, similarityFunc, delta): | 201 def alignedLCSS(_l1, _l2, similarityFunc, delta): |
| 174 '''returns the best matching if using a finite delta by shiftinig the series alignments''' | 202 '''returns the best matching if using a finite delta by shiftinig the series alignments''' |
| 175 if len(_l2) < len(_l1): # l1 is the shortest | 203 if len(_l2) < len(_l1): # l1 is the shortest |
| 176 l1 = _l2 | 204 l1 = _l2 |
| 185 # print l2[max(0,i-n1):] | 213 # print l2[max(0,i-n1):] |
| 186 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) | 214 # print LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) |
| 187 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))] | 215 lcss = [LCSS(l1[min(-i-1,n1):], l2[max(0,i-n1):], similarityFunc, delta) for i in xrange(min(delta,n1), max(n1+n2-delta, n2+1))] |
| 188 return max(lcss) | 216 return max(lcss) |
| 189 | 217 |
| 190 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod = min): | 218 def normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): |
| 191 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthMethod(len(l1), len(l2)) | 219 return float(alignedLCSS(l1, l2, similarityFunc, delta))/lengthFunc(len(l1), len(l2)) |
| 192 | 220 |
| 193 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthMethod = min): | 221 def alignedDLCSS(l1, l2, similarityFunc, delta, lengthFunc = min): |
| 194 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthMethod) | 222 return 1-normalizedAlignedLCSS(l1, l2, similarityFunc, delta, lengthFunc) |
| 195 | 223 |
| 196 ######################### | 224 ######################### |
| 197 # maths section | 225 # maths section |
| 198 ######################### | 226 ######################### |
| 199 | 227 |
