Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 76:64fde2b1f96d
simplified intervales in empiricalDistribution
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Thu, 10 Feb 2011 22:15:54 -0500 |
| parents | 46ec876ce90e |
| children | 5e6cd36a991c |
comparison
equal
deleted
inserted
replaced
| 75:46ec876ce90e | 76:64fde2b1f96d |
|---|---|
| 23 result += ((e-o)*(e-o))/e | 23 result += ((e-o)*(e-o))/e |
| 24 return result | 24 return result |
| 25 | 25 |
| 26 class empiricalDistribution: | 26 class empiricalDistribution: |
| 27 '''Class to represent a sample of a distribution for a continuous random variable | 27 '''Class to represent a sample of a distribution for a continuous random variable |
| 28 with the number of observations for each interval''' | 28 with the number of observations for each interval |
| 29 intervals (categories variable) are defined by their left limits, the last one being the right limit | |
| 30 categories contain therefore one more element than the counts''' | |
| 29 def __init__(self, categories, counts): | 31 def __init__(self, categories, counts): |
| 30 self.categories = categories | 32 self.categories = categories |
| 31 self.counts = counts | 33 self.counts = counts |
| 32 | 34 |
| 33 def mean(self): | 35 def mean(self): |
| 34 result = 0. | 36 result = 0. |
| 35 for i,c in zip(self.categories, self.counts): | 37 for i in range(len(self.counts)-1): |
| 36 result += c*(i[1]+i[0])/2 | 38 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 |
| 37 return result/sum(self.counts) | 39 return result/self.nSamples() |
| 38 | 40 |
| 39 def var(self, mean = None): | 41 def var(self, mean = None): |
| 40 if not mean: | 42 if not mean: |
| 41 m = self.mean() | 43 m = self.mean() |
| 42 else: | 44 else: |
| 43 m = mean | 45 m = mean |
| 44 result = 0. | 46 result = 0. |
| 45 for i,c in zip(self.categories, self.counts): | 47 for i in range(len(self.counts)-1): |
| 46 mid = (i[1]+i[0])/2 | 48 mid = (self.categories[i]+self.categories[i+1])/2 |
| 47 result += c*(mid - m)*(mid - m) | 49 result += self.counts[i]*(mid - m)*(mid - m) |
| 48 return result/(self.nSamples()-1) | 50 return result/(self.nSamples()-1) |
| 49 | 51 |
| 50 def nSamples(self): | 52 def nSamples(self): |
| 51 return sum(self.counts) | 53 return sum(self.counts) |
| 52 | 54 |
| 54 '''cdf is a cumulative distribution function | 56 '''cdf is a cumulative distribution function |
| 55 returning the probability of the variable being less that x''' | 57 returning the probability of the variable being less that x''' |
| 56 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] | 58 # refCumulativeCounts = [0]#[cdf(self.categories[0][0])] |
| 57 # for inter in self.categories: | 59 # for inter in self.categories: |
| 58 # refCumulativeCounts.append(cdf(inter[1])) | 60 # refCumulativeCounts.append(cdf(inter[1])) |
| 59 refCumulativeCounts = [cdf(inter[1]) for inter in self.categories[:-1]] | 61 refCumulativeCounts = [cdf(x) for x in self.categories[1:-1]] |
| 60 | 62 |
| 61 refProba = [refCumulativeCounts[0]] | 63 refProba = [refCumulativeCounts[0]] |
| 62 for i in xrange(1,len(refCumulativeCounts)): | 64 for i in xrange(1,len(refCumulativeCounts)): |
| 63 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1]) | 65 refProba.append(refCumulativeCounts[i]-refCumulativeCounts[i-1]) |
| 64 refProba.append(1-refCumulativeCounts[-1]) | 66 refProba.append(1-refCumulativeCounts[-1]) |
