Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 670:f72ed51c6b65
corrected other missing imports
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Tue, 26 May 2015 11:39:36 +0200 |
| parents | df6be882f325 |
| children | 849f5f8bf4b9 |
comparison
equal
deleted
inserted
replaced
| 669:df6be882f325 | 670:f72ed51c6b65 |
|---|---|
| 1 #! /usr/bin/env python | 1 #! /usr/bin/env python |
| 2 ''' Generic utilities.''' | 2 ''' Generic utilities.''' |
| 3 | 3 |
| 4 import matplotlib.pyplot as plt | 4 import matplotlib.pyplot as plt |
| 5 from datetime import time, datetime | 5 from datetime import time, datetime |
| 6 from math import sqrt | 6 from math import sqrt, ceil, floor |
| 7 from scipy.stats import kruskal, shapiro | 7 from scipy.stats import kruskal, shapiro |
| 8 | 8 |
| 9 datetimeFormat = "%Y-%m-%d %H:%M:%S" | 9 datetimeFormat = "%Y-%m-%d %H:%M:%S" |
| 10 | 10 |
| 11 ######################### | 11 ######################### |
| 35 | 35 |
| 36 Use otherwise t.interval or norm.interval | 36 Use otherwise t.interval or norm.interval |
| 37 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11)) | 37 ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11)) |
| 38 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples)) | 38 t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples)) |
| 39 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)''' | 39 loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)''' |
| 40 from math import sqrt | |
| 41 from scipy.stats.distributions import norm, t | 40 from scipy.stats.distributions import norm, t |
| 42 if trueStd: | 41 if trueStd: |
| 43 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. | 42 k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200. |
| 44 else: # use Student | 43 else: # use Student |
| 45 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100. | 44 k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100. |
| 209 return max(d, key=d.get) | 208 return max(d, key=d.get) |
| 210 | 209 |
| 211 def framesToTime(nFrames, frameRate, initialTime = time()): | 210 def framesToTime(nFrames, frameRate, initialTime = time()): |
| 212 '''returns a datetime.time for the time in hour, minutes and seconds | 211 '''returns a datetime.time for the time in hour, minutes and seconds |
| 213 initialTime is a datetime.time''' | 212 initialTime is a datetime.time''' |
| 214 from math import floor | |
| 215 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second) | 213 seconds = int(floor(float(nFrames)/float(frameRate))+initialTime.hour*3600+initialTime.minute*60+initialTime.second) |
| 216 h = int(floor(seconds/3600.)) | 214 h = int(floor(seconds/3600.)) |
| 217 seconds = seconds - h*3600 | 215 seconds = seconds - h*3600 |
| 218 m = int(floor(seconds/60)) | 216 m = int(floor(seconds/60)) |
| 219 seconds = seconds - m*60 | 217 seconds = seconds - m*60 |
| 231 return xsorted, [D[x] for x in xsorted] | 229 return xsorted, [D[x] for x in xsorted] |
| 232 | 230 |
| 233 def ceilDecimals(v, nDecimals): | 231 def ceilDecimals(v, nDecimals): |
| 234 '''Rounds the number at the nth decimal | 232 '''Rounds the number at the nth decimal |
| 235 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' | 233 eg 1.23 at 0 decimal is 2, at 1 decimal is 1.3''' |
| 236 from math import ceil,pow | 234 tens = 10**nDecimals |
| 237 tens = pow(10,nDecimals) | |
| 238 return ceil(v*tens)/tens | 235 return ceil(v*tens)/tens |
| 239 | 236 |
| 240 def inBetween(bound1, bound2, x): | 237 def inBetween(bound1, bound2, x): |
| 241 return bound1 <= x <= bound2 or bound2 <= x <= bound1 | 238 return bound1 <= x <= bound2 or bound2 <= x <= bound1 |
| 242 | 239 |
| 421 experiments.loc[i,'nobs'] = int(results.nobs) | 418 experiments.loc[i,'nobs'] = int(results.nobs) |
| 422 return experiments | 419 return experiments |
| 423 | 420 |
| 424 def generateExperiments(independentVariables): | 421 def generateExperiments(independentVariables): |
| 425 '''Generates all possible models for including or not each independent variable''' | 422 '''Generates all possible models for including or not each independent variable''' |
| 423 from numpy import nan | |
| 424 from pandas import DataFrame | |
| 426 experiments = {} | 425 experiments = {} |
| 427 nIndependentVariables = len(independentVariables) | 426 nIndependentVariables = len(independentVariables) |
| 428 if nIndependentVariables != len(set(independentVariables)): | 427 if nIndependentVariables != len(set(independentVariables)): |
| 429 print("Duplicate variables. Exiting") | 428 print("Duplicate variables. Exiting") |
| 430 import sys | 429 import sys |
| 431 sys.exit() | 430 sys.exit() |
| 432 nModels = 2**nIndependentVariables | 431 nModels = 2**nIndependentVariables |
| 433 for i,var in enumerate(independentVariables): | 432 for i,var in enumerate(independentVariables): |
| 434 pattern = [False]*(2**i)+[True]*(2**i) | 433 pattern = [False]*(2**i)+[True]*(2**i) |
| 435 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) | 434 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) |
| 436 experiments = pd.DataFrame(experiments) | 435 experiments = DataFrame(experiments) |
| 437 experiments['r2adj'] = 0. | 436 experiments['r2adj'] = 0. |
| 438 experiments['condNum'] = np.nan | 437 experiments['condNum'] = nan |
| 439 experiments['shapiroP'] = -1 | 438 experiments['shapiroP'] = -1 |
| 440 experiments['nobs'] = -1 | 439 experiments['nobs'] = -1 |
| 441 return experiments | 440 return experiments |
| 442 | 441 |
| 443 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): | 442 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): |
| 444 '''Generates all possible model with the independentVariables | 443 '''Generates all possible model with the independentVariables |
| 445 and runs them, saving the results in experiments | 444 and runs them, saving the results in experiments |
| 446 with multiprocess option''' | 445 with multiprocess option''' |
| 446 from pandas import concat | |
| 447 experiments = generateExperiments(independentVariables) | 447 experiments = generateExperiments(independentVariables) |
| 448 nModels = len(experiments) | 448 nModels = len(experiments) |
| 449 print("Running {} models with {} processes".format(nModels, nProcesses)) | 449 print("Running {} models with {} processes".format(nModels, nProcesses)) |
| 450 if nProcesses == 1: | 450 if nProcesses == 1: |
| 451 return runModels(experiments, data, dependentVariable, independentVariables, regressionType) | 451 return runModels(experiments, data, dependentVariable, independentVariables, regressionType) |
| 452 else: | 452 else: |
| 453 pool = Pool(processes = nProcesses) | 453 pool = Pool(processes = nProcesses) |
| 454 chunkSize = int(np.ceil(nModels/nProcesses)) | 454 chunkSize = int(ceil(nModels/nProcesses)) |
| 455 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)] | 455 jobs = [pool.apply_async(runModels, args = (experiments[i*chunkSize:(i+1)*chunkSize], data, dependentVariable, independentVariables, regressionType)) for i in range(nProcesses)] |
| 456 return pd.concat([job.get() for job in jobs]) | 456 return concat([job.get() for job in jobs]) |
| 457 | 457 |
| 458 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None): | 458 def findBestModelFwd(data, dependentVariable, independentVariables, modelFunc, experiments = None): |
| 459 '''Forward search for best model (based on adjusted R2) | 459 '''Forward search for best model (based on adjusted R2) |
| 460 Randomly starting with one variable and adding randomly variables | 460 Randomly starting with one variable and adding randomly variables |
| 461 if they improve the model | 461 if they improve the model |
| 462 | 462 |
| 463 The results are added to experiments if provided as argument | 463 The results are added to experiments if provided as argument |
| 464 Storing in experiment relies on the index being the number equal | 464 Storing in experiment relies on the index being the number equal |
| 465 to the binary code derived from the independent variables''' | 465 to the binary code derived from the independent variables''' |
| 466 from numpy.random import permutation as nppermutation | |
| 466 if experiments is None: | 467 if experiments is None: |
| 467 experiments = generateExperiments(independentVariables) | 468 experiments = generateExperiments(independentVariables) |
| 468 nIndependentVariables = len(independentVariables) | 469 nIndependentVariables = len(independentVariables) |
| 469 permutation = np.random.permutation(range(nIndependentVariables)).tolist() | 470 permutation = nppermutation(range(nIndependentVariables)).tolist() |
| 470 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} | 471 variableMapping = {j: independentVariables[i] for i,j in enumerate(permutation)} |
| 471 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)])) | 472 print('Tested variables '+', '.join([variableMapping[i] for i in xrange(nIndependentVariables)])) |
| 472 bestModel = [False]*nIndependentVariables | 473 bestModel = [False]*nIndependentVariables |
| 473 currentVarNum = 0 | 474 currentVarNum = 0 |
| 474 currentR2Adj = 0. | 475 currentR2Adj = 0. |
