Mercurial > hg > nsaunier > traffic-intelligence
comparison python/utils.py @ 758:0a05883216cf
merge with dev
| author | Nicolas Saunier <nicolas.saunier@polymtl.ca> |
|---|---|
| date | Tue, 03 Nov 2015 13:48:56 -0500 |
| parents | 10dbab1e871d |
| children | e01cabca4c55 |
comparison
equal
deleted
inserted
replaced
| 748:d99866b0528a | 758:0a05883216cf |
|---|---|
| 72 result = 0. | 72 result = 0. |
| 73 for e, o in zip(expected, observed): | 73 for e, o in zip(expected, observed): |
| 74 result += ((e-o)*(e-o))/e | 74 result += ((e-o)*(e-o))/e |
| 75 return result | 75 return result |
| 76 | 76 |
| 77 class EmpiricalDistribution(object): | 77 class DistributionSample(object): |
| 78 def nSamples(self): | 78 def nSamples(self): |
| 79 return sum(self.counts) | 79 return sum(self.counts) |
| 80 | 80 |
| 81 def cumulativeDensityFunction(sample, normalized = False): | 81 def cumulativeDensityFunction(sample, normalized = False): |
| 82 '''Returns the cumulative density function of the sample of a random variable''' | 82 '''Returns the cumulative density function of the sample of a random variable''' |
| 84 counts = arange(1,len(sample)+1) # dtype = float | 84 counts = arange(1,len(sample)+1) # dtype = float |
| 85 if normalized: | 85 if normalized: |
| 86 counts /= float(len(sample)) | 86 counts /= float(len(sample)) |
| 87 return xaxis, counts | 87 return xaxis, counts |
| 88 | 88 |
| 89 class EmpiricalDiscreteDistribution(EmpiricalDistribution): | 89 class DiscreteDistributionSample(DistributionSample): |
| 90 '''Class to represent a sample of a distribution for a discrete random variable | 90 '''Class to represent a sample of a distribution for a discrete random variable''' |
| 91 ''' | |
| 92 def __init__(self, categories, counts): | 91 def __init__(self, categories, counts): |
| 93 self.categories = categories | 92 self.categories = categories |
| 94 self.counts = counts | 93 self.counts = counts |
| 95 | 94 |
| 96 def mean(self): | 95 def mean(self): |
| 111 refProba = [probability(c) for c in self.categories] | 110 refProba = [probability(c) for c in self.categories] |
| 112 refProba[-1] = 1-npsum(refProba[:-1]) | 111 refProba[-1] = 1-npsum(refProba[:-1]) |
| 113 refCounts = [r*self.nSamples() for r in refProba] | 112 refCounts = [r*self.nSamples() for r in refProba] |
| 114 return refCounts, refProba | 113 return refCounts, refProba |
| 115 | 114 |
| 116 class EmpiricalContinuousDistribution(EmpiricalDistribution): | 115 class ContinuousDistributionSample(DistributionSample): |
| 117 '''Class to represent a sample of a distribution for a continuous random variable | 116 '''Class to represent a sample of a distribution for a continuous random variable |
| 118 with the number of observations for each interval | 117 with the number of observations for each interval |
| 119 intervals (categories variable) are defined by their left limits, the last one being the right limit | 118 intervals (categories variable) are defined by their left limits, the last one being the right limit |
| 120 categories contain therefore one more element than the counts''' | 119 categories contain therefore one more element than the counts''' |
| 121 def __init__(self, categories, counts): | 120 def __init__(self, categories, counts): |
| 122 # todo add samples for initialization and everything to None? (or setSamples?) | 121 # todo add samples for initialization and everything to None? (or setSamples?) |
| 123 self.categories = categories | 122 self.categories = categories |
| 124 self.counts = counts | 123 self.counts = counts |
| 124 | |
| 125 @staticmethod | |
| 126 def generate(sample, categories): | |
| 127 if min(sample) < min(categories): | |
| 128 print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories))) | |
| 129 if max(sample) > max(categories): | |
| 130 print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories))) | |
| 131 dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1)) | |
| 132 for s in sample: | |
| 133 i = 0 | |
| 134 while i<len(dist.categories) and dist.categories[i] <= s: | |
| 135 i += 1 | |
| 136 if i <= len(dist.counts): | |
| 137 dist.counts[i-1] += 1 | |
| 138 #print('{} in {} {}'.format(s, dist.categories[i-1], dist.categories[i])) | |
| 139 else: | |
| 140 print('Element {} is not in the categories'.format(s)) | |
| 141 return dist | |
| 125 | 142 |
| 126 def mean(self): | 143 def mean(self): |
| 127 result = 0. | 144 result = 0. |
| 128 for i in range(len(self.counts)-1): | 145 for i in range(len(self.counts)-1): |
| 129 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 | 146 result += self.counts[i]*(self.categories[i]+self.categories[i+1])/2 |
