#!/usr/local/bin/pythonw
# coding: iso-8859-1
# Copyright 2009 by Amos Newcombe
# 
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along with this program.  If not, see <http://www.gnu.org/licenses/>.

'''Goodness of fit between a sample of data and a theoretical distribution, using the chi-square test on binned data.

The bins are sized to contain the same number of data points.

KLUGE: This class assumes that self.prob.Distribution() is continuous. Otherwise, we might well expect different numbers of data points in different bins, and this class will calculate incorrectly, with invalid results. A more careful estimation of the theoretical frequencies in each bin would become necessary. In fact a discrete distribution is already binned and shouldn't be binned again. However, some bins may need to be combined to bring the theoretical frequencies above 5.

If a random variable x is distributed according to the Probability instance prob, then prob.Distribution(x) is distributed uniformly in [0, 1].
'''

from Statistics import Statistics
from mathlib.Probability.Chi2 import Chi2
from math import floor, sqrt

class Chi2Fit1(Statistics):
	
	def __init__(self, lxData, prob, cBins):
		Statistics.__init__(self, lxData, prob, cBins)
	
	def Calculate(self):
		prob, cBins = self.tpArgs
		lcBins = [0 for i in range(cBins)]
		self['n'] = len(self.getData())
		for x in self.getData():
			iBin = int(floor(cBins * prob.Distribution(x)))
			if iBin == cBins: iBin -= 1 # The simplest way to stay within the list; rare.
			lcBins[iBin] += 1
		cTheory = self['n'] / cBins
		self['xChi2'] = sum([(c-cTheory)*(c-cTheory)/float(cTheory) for c in lcBins])
# 		self['xChi']  = sqrt(self['xChi2'])
		self['sig']   = 1. - Chi2(cBins-2).Distribution(self['xChi2'])
		# one-tailed test for high values of chi^2
		return self

if __name__ == '__main__':
	
	from mathlib.Probability.Gaussian import Gaussian
	from KSKp1 import KSKp1
	from StatTest import Test
	
	def getStat(prob, cSample, cBins):
		return Chi2Fit1([prob.Sample() for j in range(cSample)], prob, cBins)
	
	prob = Gaussian()
	cSample = 1024
	cBins = 32
	cReps = 32
	Test(getStat(prob, cSample, cBins), globals(), locals())
	print 'Summary significance:', KSKp1([
		getStat(prob, cSample, cBins).Calculate()['sig']
		for i in range(cReps)
	]).Calculate()['sig']