Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 1012:01db14e947e4
resolved
| author | Wendlasida |
|---|---|
| date | Fri, 01 Jun 2018 10:47:49 -0400 |
| parents | 4f3387a242a1 |
| children | 16932cefabc1 |
comparison
equal
deleted
inserted
replaced
| 1011:4f0312bee393 | 1012:01db14e947e4 |
|---|---|
| 299 return 1 | 299 return 1 |
| 300 | 300 |
| 301 def sortByLength(instances, reverse = False): | 301 def sortByLength(instances, reverse = False): |
| 302 '''Returns a new list with the instances sorted by length (method __len__) | 302 '''Returns a new list with the instances sorted by length (method __len__) |
| 303 reverse is passed to sorted''' | 303 reverse is passed to sorted''' |
| 304 return sorted(instances, cmp = compareLengthForSort, reverse = reverse) | 304 return sorted(instances, key = len, reverse = reverse) |
| 305 | 305 |
| 306 def ceilDecimals(v, nDecimals): | 306 def ceilDecimals(v, nDecimals): |
| 307 '''Rounds the number at the nth decimal | 307 '''Rounds the number at the nth decimal |
| 308 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' | 308 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' |
| 309 tens = 10**nDecimals | 309 tens = 10**nDecimals |
| 404 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') | 404 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') |
| 405 data[newVariable] = (data[var] == val) | 405 data[newVariable] = (data[var] == val) |
| 406 newVariables.append(newVariable) | 406 newVariables.append(newVariable) |
| 407 return newVariables | 407 return newVariables |
| 408 | 408 |
| 409 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = u''): | 409 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, renameVariables = lambda s: s, kwCaption = ''): |
| 410 '''Studies the influence of (nominal) independent variable over the dependent variable | 410 '''Studies the influence of (nominal) independent variable over the dependent variable |
| 411 | 411 |
| 412 Makes tests if the conditional distributions are normal | 412 Makes tests if the conditional distributions are normal |
| 413 using the Shapiro-Wilk test (in which case ANOVA could be used) | 413 using the Shapiro-Wilk test (in which case ANOVA could be used) |
| 414 Implements uses the non-parametric Kruskal Wallis test''' | 414 Implements uses the non-parametric Kruskal Wallis test''' |
| 510 return result | 510 return result |
| 511 | 511 |
| 512 def saveDokMatrix(filename, m, lowerTriangle = False): | 512 def saveDokMatrix(filename, m, lowerTriangle = False): |
| 513 'Saves a dok_matrix using savez' | 513 'Saves a dok_matrix using savez' |
| 514 if lowerTriangle: | 514 if lowerTriangle: |
| 515 keys = [k for k in m.keys() if k[0] > k[1]] | 515 keys = [k for k in m if k[0] > k[1]] |
| 516 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys]) | 516 savez(filename, shape = m.shape, keys = keys, values = [m[k[0],k[1]] for k in keys]) |
| 517 else: | 517 else: |
| 518 savez(filename, shape = m.shape, keys = m.keys(), values = m.values()) | 518 savez(filename, shape = m.shape, keys = list(m.keys()), values = list(m.values())) |
| 519 | 519 |
| 520 def loadDokMatrix(filename): | 520 def loadDokMatrix(filename): |
| 521 'Loads a dok_matrix saved using the above saveDokMatrix' | 521 'Loads a dok_matrix saved using the above saveDokMatrix' |
| 522 data = npload(filename) | 522 data = npload(filename) |
| 523 m = dok_matrix(tuple(data['shape'])) | 523 m = dok_matrix(tuple(data['shape'])) |
| 610 to the binary code derived from the independent variables''' | 610 to the binary code derived from the independent variables''' |
| 611 from numpy.random import permutation as nppermutation | 611 from numpy.random import permutation as nppermutation |
| 612 if experiments is None: | 612 if experiments is None: |
| 613 experiments = generateExperiments(independentVariables) | 613 experiments = generateExperiments(independentVariables) |
| 614 nIndependentVariables = len(independentVariables) | 614 nIndependentVariables = len(independentVariables) |
| 615 permutation = nppermutation(range(nIndependentVariables)).tolist() | 615 permutation = nppermutation(list(range(nIndependentVariables))) |
| 616 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} | 616 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} |
| 617 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)])) | 617 print('Tested variables '+', '.join([variableMapping[i] for i in range(nIndependentVariables)])) |
| 618 bestModel = [False]*nIndependentVariables | 618 bestModel = [False]*nIndependentVariables |
| 619 currentVarNum = 0 | 619 currentVarNum = 0 |
| 620 currentR2Adj = 0. | 620 currentR2Adj = 0. |
| 621 for currentVarNum in xrange(nIndependentVariables): | 621 for currentVarNum in range(nIndependentVariables): |
| 622 currentModel = [i for i in bestModel] | 622 currentModel = [i for i in bestModel] |
| 623 currentModel[currentVarNum] = True | 623 currentModel[currentVarNum] = True |
| 624 rowIdx = sum([0]+[2**i for i in xrange(nIndependentVariables) if currentModel[permutation[i]]]) | 624 rowIdx = sum([0]+[2**i for i in range(nIndependentVariables) if currentModel[permutation[i]]]) |
| 625 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in xrange(nIndependentVariables) if currentModel[permutation[i]]]) | 625 #print currentVarNum, sum(currentModel), ', '.join([independentVariables[i] for i in range(nIndependentVariables) if currentModel[permutation[i]]]) |
| 626 if experiments.loc[rowIdx, 'shapiroP'] < 0: | 626 if experiments.loc[rowIdx, 'shapiroP'] < 0: |
| 627 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables) | 627 modelStr = modelString(experiments.loc[rowIdx], dependentVariable, independentVariables) |
| 628 model = modelFunc(modelStr, data = data) | 628 model = modelFunc(modelStr, data = data) |
| 629 results = model.fit() | 629 results = model.fit() |
| 630 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj | 630 experiments.loc[rowIdx, 'r2adj'] = results.rsquared_adj |
| 750 def similarities(self, l1, l2, jshift=0): | 750 def similarities(self, l1, l2, jshift=0): |
| 751 n1 = len(l1) | 751 n1 = len(l1) |
| 752 n2 = len(l2) | 752 n2 = len(l2) |
| 753 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) | 753 self.similarityTable = zeros((n1+1,n2+1), dtype = npint) |
| 754 if self.similarityFunc is not None: | 754 if self.similarityFunc is not None: |
| 755 for i in xrange(1,n1+1): | 755 for i in range(1,n1+1): |
| 756 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | 756 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
| 757 if self.similarityFunc(l1[i-1], l2[j-1]): | 757 if self.similarityFunc(l1[i-1], l2[j-1]): |
| 758 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | 758 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
| 759 else: | 759 else: |
| 760 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | 760 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
| 761 elif self.metric is not None: | 761 elif self.metric is not None: |
| 762 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon | 762 similarElements = distance.cdist(l1, l2, self.metric) <= self.epsilon |
| 763 for i in xrange(1,n1+1): | 763 for i in range(1,n1+1): |
| 764 for j in xrange(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): | 764 for j in range(max(1,i-jshift-self.delta),min(n2,i-jshift+self.delta)+1): |
| 765 if similarElements[i-1, j-1]: | 765 if similarElements[i-1, j-1]: |
| 766 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 | 766 self.similarityTable[i,j] = self.similarityTable[i-1,j-1]+1 |
| 767 else: | 767 else: |
| 768 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) | 768 self.similarityTable[i,j] = max(self.similarityTable[i-1,j], self.similarityTable[i,j-1]) |
| 769 | 769 |
| 800 n2 = len(l2) | 800 n2 = len(l2) |
| 801 | 801 |
| 802 if self.aligned: | 802 if self.aligned: |
| 803 lcssValues = {} | 803 lcssValues = {} |
| 804 similarityTables = {} | 804 similarityTables = {} |
| 805 for i in xrange(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2 | 805 for i in range(-n2-self.delta+1, n1+self.delta): # interval such that [i-shift-delta, i-shift+delta] is never empty, which happens when i-shift+delta < 1 or when i-shift-delta > n2 |
| 806 self.similarities(l1, l2, i) | 806 self.similarities(l1, l2, i) |
| 807 lcssValues[i] = self.similarityTable.max() | 807 lcssValues[i] = self.similarityTable.max() |
| 808 similarityTables[i] = self.similarityTable | 808 similarityTables[i] = self.similarityTable |
| 809 #print self.similarityTable | 809 #print self.similarityTable |
| 810 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance | 810 alignmentShift = argmaxDict(lcssValues) # ideally get the medium alignment shift, the one that minimizes distance |
| 892 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.'])) | 892 monochrome = (cycler('color', ['k']) * cycler('linestyle', ['-', '--', ':', '-.'])) |
| 893 plt.rc('axes', prop_cycle=monochrome) | 893 plt.rc('axes', prop_cycle=monochrome) |
| 894 | 894 |
| 895 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1): | 895 def plotIndicatorMap(indicatorMap, squareSize, masked = True, defaultValue=-1): |
| 896 from matplotlib.pyplot import pcolor | 896 from matplotlib.pyplot import pcolor |
| 897 coords = array(indicatorMap.keys()) | 897 coords = array(list(indicatorMap.keys())) |
| 898 minX = min(coords[:,0]) | 898 minX = min(coords[:,0]) |
| 899 minY = min(coords[:,1]) | 899 minY = min(coords[:,1]) |
| 900 X = arange(minX, max(coords[:,0])+1.1)*squareSize | 900 X = arange(minX, max(coords[:,0])+1.1)*squareSize |
| 901 Y = arange(minY, max(coords[:,1])+1.1)*squareSize | 901 Y = arange(minY, max(coords[:,1])+1.1)*squareSize |
| 902 C = defaultValue*ones((len(Y), len(X))) | 902 C = defaultValue*ones((len(Y), len(X))) |
| 903 for k,v in indicatorMap.iteritems(): | 903 for k,v in indicatorMap.items(): |
| 904 C[k[1]-minY,k[0]-minX] = v | 904 C[k[1]-minY,k[0]-minX] = v |
| 905 if masked: | 905 if masked: |
| 906 pcolor(X, Y, ma.masked_where(C==defaultValue,C)) | 906 pcolor(X, Y, ma.masked_where(C==defaultValue,C)) |
| 907 else: | 907 else: |
| 908 pcolor(X, Y, C) | 908 pcolor(X, Y, C) |
| 924 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp') | 924 To get hourly data for 2009 and 2012, January, March and October, downloadECWeather(10761, [2009,2012], [1,3,10], '/tmp') |
| 925 | 925 |
| 926 for annee in `seq 2016 2017`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&timeframe=2&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done | 926 for annee in `seq 2016 2017`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&timeframe=2&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done |
| 927 for annee in `seq 2016 2017`;do for mois in `seq 1 12`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&Month=${mois}&timeframe=1&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done;done | 927 for annee in `seq 2016 2017`;do for mois in `seq 1 12`;do wget --content-disposition "http://climat.meteo.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=${annee}&Month=${mois}&timeframe=1&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es" ;done;done |
| 928 ''' | 928 ''' |
| 929 import urllib2 | 929 import urllib.request |
| 930 if english: | 930 if english: |
| 931 language = 'e' | 931 language = 'e' |
| 932 else: | 932 else: |
| 933 language = 'f' | 933 language = 'f' |
| 934 if len(months) == 0: | 934 if len(months) == 0: |
| 937 else: | 937 else: |
| 938 timeFrame = 1 | 938 timeFrame = 1 |
| 939 | 939 |
| 940 for year in years: | 940 for year in years: |
| 941 for month in months: | 941 for month in months: |
| 942 url = urllib2.urlopen('http://climate.weather.gc.ca/climate_data/bulk_data_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit= Download+Data'.format(language, stationID, year, month, timeFrame)) | |
| 943 #http://climat.meteo.gc.ca/climateData/bulkdata_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=++T%C3%A9l%C3%A9charger+%0D%0Ades+donn%C3%A9es | |
| 944 data = url.read() | |
| 945 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year) | 942 outFilename = '{}/{}-{}'.format(outputDirectoryname, stationID, year) |
| 946 if timeFrame == 1: | 943 if timeFrame == 1: |
| 947 outFilename += '-{}-hourly'.format(month) | 944 outFilename += '-{}-hourly'.format(month) |
| 948 else: | 945 else: |
| 949 outFilename += '-daily' | 946 outFilename += '-daily' |
| 950 outFilename += '.csv' | 947 outFilename += '.csv' |
| 951 out = open(outFilename, 'w') | 948 url = urllib.request.urlretrieve('http://climate.weather.gc.ca/climate_data/bulk_data_{}.html?format=csv&stationID={}&Year={}&Month={}&Day=1&timeframe={}&submit=Download+Data'.format(language, stationID, year, month, timeFrame), outFilename) |
| 952 out.write(data) | |
| 953 out.close() | |
| 954 | 949 |
| 955 ######################### | 950 ######################### |
| 956 # File I/O | 951 # File I/O |
| 957 ######################### | 952 ######################### |
| 958 | 953 |
| 1009 return [float(x) for x in l.split(separator)] | 1004 return [float(x) for x in l.split(separator)] |
| 1010 | 1005 |
| 1011 def line2Ints(l, separator=' '): | 1006 def line2Ints(l, separator=' '): |
| 1012 '''Returns the list of ints corresponding to the string''' | 1007 '''Returns the list of ints corresponding to the string''' |
| 1013 return [int(x) for x in l.split(separator)] | 1008 return [int(x) for x in l.split(separator)] |
| 1014 | |
| 1015 ######################### | |
| 1016 # CLI utils | |
| 1017 ######################### | |
| 1018 | |
| 1019 def parseCLIOptions(helpMessage, options, cliArgs, optionalOptions=[]): | |
| 1020 ''' Simple function to handle similar argument parsing | |
| 1021 Returns the dictionary of options and their values | |
| 1022 | |
| 1023 * cliArgs are most likely directly sys.argv | |
| 1024 (only the elements after the first one are considered) | |
| 1025 | |
| 1026 * options should be a list of strings for getopt options, | |
| 1027 eg ['frame=','correspondences=','video='] | |
| 1028 A value must be provided for each option, or the program quits''' | |
| 1029 import sys, getopt | |
| 1030 from numpy.core.fromnumeric import all | |
| 1031 optionValues, args = getopt.getopt(cliArgs[1:], 'h', ['help']+options+optionalOptions) | |
| 1032 optionValues = dict(optionValues) | |
| 1033 | |
| 1034 if '--help' in optionValues.keys() or '-h' in optionValues.keys(): | |
| 1035 print(helpMessage+ | |
| 1036 '\n - Compulsory options: '+' '.join([opt.replace('=','') for opt in options])+ | |
| 1037 '\n - Non-compulsory options: '+' '.join([opt.replace('=','') for opt in optionalOptions])) | |
| 1038 sys.exit() | |
| 1039 | |
| 1040 missingArgument = [('--'+opt.replace('=','') in optionValues.keys()) for opt in options] | |
| 1041 if not all(missingArgument): | |
| 1042 print('Missing argument') | |
| 1043 print(optionValues) | |
| 1044 sys.exit() | |
| 1045 | |
| 1046 return optionValues | |
| 1047 | |
| 1048 | 1009 |
| 1049 ######################### | 1010 ######################### |
| 1050 # Profiling | 1011 # Profiling |
| 1051 ######################### | 1012 ######################### |
| 1052 | 1013 |
