# HG changeset patch # User Nicolas Saunier # Date 1445313805 14400 # Node ID 10dbab1e871dd132b6e08c037f930ed3932106c5 # Parent d45ab817ee11423eec268d13ea206105d5015949 modifications in samples and distributions diff -r d45ab817ee11 -r 10dbab1e871d python/utils.py --- a/python/utils.py Fri Oct 02 11:29:43 2015 -0400 +++ b/python/utils.py Tue Oct 20 00:03:25 2015 -0400 @@ -74,7 +74,7 @@ result += ((e-o)*(e-o))/e return result -class EmpiricalDistribution(object): +class DistributionSample(object): def nSamples(self): return sum(self.counts) @@ -86,9 +86,8 @@ counts /= float(len(sample)) return xaxis, counts -class EmpiricalDiscreteDistribution(EmpiricalDistribution): - '''Class to represent a sample of a distribution for a discrete random variable - ''' +class DiscreteDistributionSample(DistributionSample): + '''Class to represent a sample of a distribution for a discrete random variable''' def __init__(self, categories, counts): self.categories = categories self.counts = counts @@ -113,7 +112,7 @@ refCounts = [r*self.nSamples() for r in refProba] return refCounts, refProba -class EmpiricalContinuousDistribution(EmpiricalDistribution): +class ContinuousDistributionSample(DistributionSample): '''Class to represent a sample of a distribution for a continuous random variable with the number of observations for each interval intervals (categories variable) are defined by their left limits, the last one being the right limit @@ -123,6 +122,24 @@ self.categories = categories self.counts = counts + @staticmethod + def generate(sample, categories): + if min(sample) < min(categories): + print('Sample has lower min than proposed categories ({}, {})'.format(min(sample), min(categories))) + if max(sample) > max(categories): + print('Sample has higher max than proposed categories ({}, {})'.format(max(sample), max(categories))) + dist = ContinuousDistributionSample(sorted(categories), [0]*(len(categories)-1)) + for s in sample: + i = 0 + while i