comparison trafficintelligence/utils.py @ 1287:76f5693b530c

updated tests for numpy 2
author Nicolas Saunier <nicolas.saunier@polymtl.ca>
date Sat, 20 Jul 2024 20:35:21 -0400
parents bae8de98406f
children
comparison
equal deleted inserted replaced
1284:8e30c9a6ac6f 1287:76f5693b530c
9 from collections import Counter 9 from collections import Counter
10 10
11 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t, chi2_contingency 11 from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t, chi2_contingency
12 from scipy.spatial import distance 12 from scipy.spatial import distance
13 from scipy.sparse import dok_matrix 13 from scipy.sparse import dok_matrix
14 from numpy import zeros, array, exp, sum as npsum, int64 as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit, float64 14 from numpy import zeros, array, exp, sum as npsum, int64 as npint, arange, cumsum, mean, median, percentile, ones, convolve, dtype, isnan, nan, ma, isinf, savez, load as npload, log, polyfit, float64
15 from numpy.random import random_sample, permutation as nppermutation 15 from numpy.random import random_sample, permutation as nppermutation
16 from pandas import DataFrame, concat, crosstab 16 from pandas import DataFrame, concat, crosstab
17 import matplotlib.pyplot as plt 17 import matplotlib.pyplot as plt
18 18
19 datetimeFormat = "%Y-%m-%d %H:%M:%S" 19 datetimeFormat = "%Y-%m-%d %H:%M:%S"
499 if var in data.columns and data.dtypes[var] == dtype('O') and len(data[var].unique()) > 2: 499 if var in data.columns and data.dtypes[var] == dtype('O') and len(data[var].unique()) > 2:
500 values = data[var].unique() 500 values = data[var].unique()
501 if not allVariables: 501 if not allVariables:
502 values = values[:-1] 502 values = values[:-1]
503 for val in values: 503 for val in values:
504 if val is not NaN: 504 if not isnan(val):
505 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','') 505 newVariable = (var+'_{}'.format(val)).replace('.','').replace(' ','').replace('-','')
506 data[newVariable] = (data[var] == val) 506 data[newVariable] = (data[var] == val)
507 newVariables.append(newVariable) 507 newVariables.append(newVariable)
508 return newVariables 508 return newVariables
509 509
734 for i,var in enumerate(independentVariables): 734 for i,var in enumerate(independentVariables):
735 pattern = [False]*(2**i)+[True]*(2**i) 735 pattern = [False]*(2**i)+[True]*(2**i)
736 experiments[var] = pattern*(2**(nIndependentVariables-i-1)) 736 experiments[var] = pattern*(2**(nIndependentVariables-i-1))
737 experiments = DataFrame(experiments) 737 experiments = DataFrame(experiments)
738 experiments['r2adj'] = 0. 738 experiments['r2adj'] = 0.
739 experiments['condNum'] = NaN 739 experiments['condNum'] = nan
740 experiments['shapiroP'] = -1 740 experiments['shapiroP'] = -1
741 experiments['nobs'] = -1 741 experiments['nobs'] = -1
742 return experiments 742 return experiments
743 743
744 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1): 744 def findBestModel(data, dependentVariable, independentVariables, regressionType = 'ols', nProcesses = 1):