Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 85:7f1e54234f96
added empirical discrete distribution, modified class organization and names
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Mon, 18 Apr 2011 19:31:53 -0400 |
| parents | 5e6cd36a991c |
| children | f03ec4697a09 |
comparison
equal
deleted
inserted
replaced
| 84:731df2fa0010 | 85:7f1e54234f96 |
|---|---|
| 21 result = 0. | 21 result = 0. |
| 22 for e, o in zip(expected, observed): | 22 for e, o in zip(expected, observed): |
| 23 result += ((e-o)*(e-o))/e | 23 result += ((e-o)*(e-o))/e |
| 24 return result | 24 return result |
| 25 | 25 |
| 26 class empiricalDistribution: | 26 class EmpiricalDistribution: |
| 27 def nSamples(self): | |
| 28 return sum(self.counts) | |
| 29 | |
| 30 | |
| 31 class EmpiricalDiscreteDistribution(EmpiricalDistribution): | |
| 32 '''Class to represent a sample of a distribution for a discrete random variable | |
| 33 ''' | |
| 34 def __init__(self, categories, counts): | |
| 35 self.categories = categories | |
| 36 self.counts = counts | |
| 37 | |
| 38 def mean(self): | |
| 39 from numpy.core.fromnumeric import sum | |
| 40 result = [float(x*y) for x,y in zip(self.categories, self.counts)] | |
| 41 return sum(result)/self.nSamples() | |
| 42 | |
| 43 def var(self, mean = None): | |
| 44 from numpy.core.fromnumeric import sum | |
| 45 if not mean: | |
| 46 m = self.mean() | |
| 47 else: | |
| 48 m = mean | |
| 49 result = 0. | |
| 50 squares = [float((x-m)*(x-m)*y) for x,y in zip(self.categories, self.counts)] | |
| 51 return sum(squares)/(self.nSamples()-1) | |
| 52 | |
| 53 | |
| 54 class EmpiricalContinuousDistribution(EmpiricalDistribution): | |
| 27 '''Class to represent a sample of a distribution for a continuous random variable | 55 '''Class to represent a sample of a distribution for a continuous random variable |
| 28 with the number of observations for each interval | 56 with the number of observations for each interval |
| 29 intervals (categories variable) are defined by their left limits, the last one being the right limit | 57 intervals (categories variable) are defined by their left limits, the last one being the right limit |
| 30 categories contain therefore one more element than the counts''' | 58 categories contain therefore one more element than the counts''' |
| 31 def __init__(self, categories, counts): | 59 def __init__(self, categories, counts): |
| 46 result = 0. | 74 result = 0. |
| 47 for i in range(len(self.counts)-1): | 75 for i in range(len(self.counts)-1): |
| 48 mid = (self.categories[i]+self.categories[i+1])/2 | 76 mid = (self.categories[i]+self.categories[i+1])/2 |
| 49 result += self.counts[i]*(mid - m)*(mid - m) | 77 result += self.counts[i]*(mid - m)*(mid - m) |
| 50 return result/(self.nSamples()-1) | 78 return result/(self.nSamples()-1) |
| 51 | |
| 52 def nSamples(self): | |
| 53 return sum(self.counts) | |
| 54 | 79 |
| 55 def referenceCounts(self, cdf): | 80 def referenceCounts(self, cdf): |
| 56 '''cdf is a cumulative distribution function | 81 '''cdf is a cumulative distribution function |
| 57 returning the probability of the variable being less that x''' | 82 returning the probability of the variable being less that x''' |
| 58 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] | 83 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] |
