Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 676:58b9ac2f262f
fine tuning
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Wed, 27 May 2015 04:08:19 +0200 |
| parents | ab3fdff42624 |
| children | ae07c7b4cf87 |
comparison
equal
deleted
inserted
replaced
| 675:ab3fdff42624 | 676:58b9ac2f262f |
|---|---|
| 316 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') | 316 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') |
| 317 data[newVariable] = (data[var] == val) | 317 data[newVariable] = (data[var] == val) |
| 318 newVariables.append(newVariable) | 318 newVariables.append(newVariable) |
| 319 return newVariables | 319 return newVariables |
| 320 | 320 |
| 321 def frenchify(s, displayNames): | 321 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, translate = lambda s: s, kwCaption = u''): |
| 322 return s | |
| 323 | |
| 324 def kruskalWallis(data, dependentVariable, independentVariable, plotFigure = False, filenamePrefix = None, figureFileType = 'pdf', saveLatex = False, displayNames = {}): | |
| 325 '''Studies the influence of (nominal) independent variable over the dependent variable | 322 '''Studies the influence of (nominal) independent variable over the dependent variable |
| 326 | 323 |
| 327 Makes tests if the conditional distributions are normal | 324 Makes tests if the conditional distributions are normal |
| 328 using the Shapiro-Wilk test (in which case ANOVA could be used) | 325 using the Shapiro-Wilk test (in which case ANOVA could be used) |
| 329 Implements uses the non-parametric Kruskal Wallis test''' | 326 Implements uses the non-parametric Kruskal Wallis test''' |
| 345 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues) | 342 plt.xticks(range(1,len(independentVariableValues)+1), independentVariableValues) |
| 346 plt.title('{} vs {}'.format(dependentVariable, independentVariable)) | 343 plt.title('{} vs {}'.format(dependentVariable, independentVariable)) |
| 347 if filenamePrefix is not None: | 344 if filenamePrefix is not None: |
| 348 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType)) | 345 plt.savefig(filenamePrefix+'-{}-{}.{}'.format(dependentVariable, independentVariable, figureFileType)) |
| 349 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False) | 346 table = tmp.groupby([independentVariable])[dependentVariable].describe().unstack().sort(['50%'], ascending = False) |
| 347 table['count'] = table['count'].astype(int) | |
| 348 #table.index.rename(translate(table.index.name), inplace = True) | |
| 349 testResult = kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) | |
| 350 if saveLatex: | 350 if saveLatex: |
| 351 out.write('\\begin{table}[htp!]\n') | 351 out.write(translate('\\begin{minipage}{\\linewidth}\n' |
| 352 out.write(frenchify(table.to_latex(), displayNames)) | 352 +'\\centering\n' |
| 353 out.write('\caption{Test}\n' | 353 +'\\captionof{table}{'+(kwCaption.format(dependentVariable, independentVariable, *testResult))+'}\n' |
| 354 +'\end{table}[htp!]') | 354 +table.to_latex(float_format = lambda x: '{:.2f}'.format(x)).encode('ascii')+'\n' |
| 355 +'\\end{minipage}\n' | |
| 356 +'\\vspace{0.5cm}\n')) | |
| 355 else: | 357 else: |
| 356 print table | 358 print table |
| 357 return kruskal(*[tmp.loc[tmp[independentVariable] == x, dependentVariable] for x in independentVariableValues]) | 359 return testResult |
| 358 else: | 360 else: |
| 359 return None | 361 return None |
| 360 | 362 |
| 361 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc): | 363 def prepareRegression(data, dependentVariable, independentVariables, maxCorrelationThreshold, correlations, maxCorrelationP, correlationFunc): |
| 362 '''Removes variables from candidate independent variables if | 364 '''Removes variables from candidate independent variables if |
| 498 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']: | 500 if currentR2Adj < experiments.loc[rowIdx, 'r2adj']: |
| 499 currentR2Adj = experiments.loc[rowIdx, 'r2adj'] | 501 currentR2Adj = experiments.loc[rowIdx, 'r2adj'] |
| 500 bestModel[currentVarNum] = True | 502 bestModel[currentVarNum] = True |
| 501 return experiments | 503 return experiments |
| 502 | 504 |
| 503 def displayModelResults(results, model = None): | 505 def displayModelResults(results, model = None, plotFigures = True, filenamePrefix = None, figureFileType = 'pdf'): |
| 504 import statsmodels.api as sm | 506 import statsmodels.api as sm |
| 505 '''Displays some model results''' | 507 '''Displays some model results''' |
| 506 print results.summary() | 508 print(results.summary()) |
| 507 print('Shapiro-Wilk normality test for residuals: {}'.format(shapiro(results.resid))) | 509 print('Shapiro-Wilk normality test for residuals: {}'.format(shapiro(results.resid))) |
| 508 if model is not None: | 510 if plotFigures: |
| 511 if model is not None: | |
| 512 plt.figure() | |
| 513 plt.plot(results.predict(), model.endog, 'x') | |
| 514 x=plt.xlim() | |
| 515 y=plt.ylim() | |
| 516 plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r') | |
| 517 plt.title('true vs predicted') | |
| 518 if filenamePrefix is not None: | |
| 519 plt.savefig(filenamePrefix+'-true-predicted.'+figureFileType) | |
| 509 plt.figure() | 520 plt.figure() |
| 510 plt.plot(results.predict(), model.endog, 'x') | 521 plt.plot(results.predict(), results.resid, 'x') |
| 511 x=plt.xlim() | 522 if filenamePrefix is not None: |
| 512 y=plt.ylim() | 523 plt.savefig(filenamePrefix+'-residuals.'+figureFileType) |
| 513 plt.plot([max(x[0], y[0]), min(x[1], y[1])], [max(x[0], y[0]), min(x[1], y[1])], 'r') | 524 plt.title('residuals vs predicted') |
| 514 plt.title('true vs predicted') | 525 sm.qqplot(results.resid, fit = True, line = '45') |
| 515 plt.figure() | 526 if filenamePrefix is not None: |
| 516 plt.plot(results.predict(), results.resid, 'x') | 527 plt.savefig(filenamePrefix+'-qq.'+figureFileType) |
| 517 plt.title('residuals vs predicted') | |
| 518 sm.qqplot(results.resid, fit = True, line = '45') | |
| 519 | 528 |
| 520 | 529 |
| 521 ######################### | 530 ######################### |
| 522 # iterable section | 531 # iterable section |
| 523 ######################### | 532 ######################### |
