# HG changeset patch # User Nicolas Saunier # Date 1529442455 14400 # Node ID 045cb04ad7b842335ba3521217156db1f2a23169 # Parent aafbc0bab925aa245eac872959b20fe528e7d211 corrected bug in distribution diff -r aafbc0bab925 -r 045cb04ad7b8 trafficintelligence/utils.py --- a/trafficintelligence/utils.py Tue Jun 19 10:04:52 2018 -0400 +++ b/trafficintelligence/utils.py Tue Jun 19 17:07:35 2018 -0400 @@ -10,7 +10,7 @@ from scipy.stats import rv_continuous, kruskal, shapiro, lognorm, norm, t from scipy.spatial import distance from scipy.sparse import dok_matrix -from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit +from numpy import zeros, array, exp, sum as npsum, int as npint, arange, cumsum, mean, median, percentile, isnan, ones, convolve, dtype, isnan, NaN, ma, isinf, savez, load as npload, log, polyfit, float as npfloat from numpy.random import permutation as nppermutation from pandas import DataFrame, concat import matplotlib.pyplot as plt @@ -135,11 +135,12 @@ '''Returns the Chi2 statistics''' return sum([((e-o)*(e-o))/float(e) for e, o in zip(expected, observed)]) -class generateDistribution(rv_continuous): - def __init__(self, values, probabilities): +class EmpiricalContinuousDistribution(rv_continuous): + def __init__(self, values, probabilities, **kwargs): '''The values (and corresponding probabilities) are supposed to be sorted by value for v, p in zip(values, probabilities): P(X<=v)=p''' assert probabilities[0]==0 + super(EmpiricalContinuousDistribution, self).__init__(**kwargs) self.values = values self.probabilities = probabilities @@ -153,7 +154,7 @@ if i == len(self.values)-1: return self.probabilities[-1] else: - return (self.probabilities[i+1]-self.probabilities[i])/(self.values[i+1]-self.values[i]) + return self.probabilities[i]+(x-self.values[i])*float(self.probabilities[i+1]-self.probabilities[i])/float(self.values[i+1]-self.values[i]) class DistributionSample(object): def nSamples(self): @@ -164,7 +165,7 @@ xaxis = sorted(sample) counts = arange(1,len(sample)+1) # dtype = float if normalized: - counts /= float(len(sample)) + counts = counts.astype(float)/float(len(sample)) return xaxis, counts class DiscreteDistributionSample(DistributionSample):