]> gitweb.michael.orlitzky.com - dead/census-tools.git/commitdiff
Added the probability package containing the Distribution module.
authorMichael Orlitzky <michael@orlitzky.com>
Sat, 15 May 2010 04:03:22 +0000 (00:03 -0400)
committerMichael Orlitzky <michael@orlitzky.com>
Sat, 15 May 2010 04:03:22 +0000 (00:03 -0400)
src/probability/Distribution.py [new file with mode: 0644]
src/probability/__init__.py [new file with mode: 0644]

diff --git a/src/probability/Distribution.py b/src/probability/Distribution.py
new file mode 100644 (file)
index 0000000..3f778a6
--- /dev/null
@@ -0,0 +1,93 @@
+import random
+
+class Distribution(object):
+    """
+    A general class representing a probability distribution.
+    """
+
+    def __init__(self):
+        """
+        components is a list of probability distributions contained
+        within this one. Right now, it's only used for sums of other
+        distributions. This might change once I have a clearer idea of
+        how it should work.
+        """
+        self.components = []
+
+
+    def __add__(self, dist2):
+        """
+        Add another distribution to this one. Since we don't know what
+        kind of distributions we'll be adding here, we return a new
+        copy of the most general kind.
+        """
+        d = Distribution()
+        d.components = self.components + dist2.components
+        return d
+
+
+    def sample(self):
+        """
+        Sample one value from the distribution.
+        """
+        if len(self.components) == 0:
+            return None
+        else:
+            return sum([component.sample() for component in self.components])
+
+
+    def cdf(self, x):
+        """
+        Evaluate the cumulative distribution function at x. Since we don't
+        know our components, there is no good way to do this. Instead, we
+        take a large number of samples, and see how many were less than or
+        equal to x.
+        """
+        trials = 1000
+        lte_count = 0
+
+        for i in range(0, trials):
+            if self.sample() <= x:
+                lte_count += 1
+
+        return (float(lte_count) / float(trials))
+
+
+
+class Uniform(Distribution):
+    """
+    Represents a uniform probability distribution.
+    """
+
+    def __init__(self, a, b):
+        """
+        In subclasses, we know that there are no other components. For
+        example, a uniform distribution is just made up of a uniform
+        distribution and not, say, the sum of two uniforms (because
+        that would no longer be uniform).
+        """
+        self.components = [self]
+        self.min = float(min(a,b))
+        self.max = float(max(a,b))
+
+
+    def sample(self):
+        return random.uniform(self.min, self.max)
+
+
+    def cdf(self, x):
+        """
+        We can evaluate the CDF in special cases like this.
+        """
+        x = float(x)
+
+        if x <= self.min:
+            return 0.0
+        elif x >= self.max:
+            return 1.0
+        else:
+            # x is somewhere between self.min and self.max and is equally
+            # likely to be at all points in between; so, we just compute
+            # "how far" through the interval (self.min, self.max) is as a
+            # fraction of the whole, and return that.
+            return ((x - self.min) / (self.max - self.min))
diff --git a/src/probability/__init__.py b/src/probability/__init__.py
new file mode 100644 (file)
index 0000000..8076a85
--- /dev/null
@@ -0,0 +1 @@
+# <3 git