#!/usr/local/bin/pythonw
#coding: iso-8859-1
# Copyright © 2004 by Amos Newcombe.
# 
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along with this program.  If not, see <http://www.gnu.org/licenses/>.

'''moment-based statistics
'''

from math import sqrt
from operator import add, mul
from Statistics import Statistics

class Moments(Statistics):
	'''Collect statistics based on the moments (sums of powers) of the data: most notably mean and variance.
	
	The statistics are returned in self['moments']: a list of length floor(cOrder)+1. if M = self['moments'], and X = self.getData(), then
	M[0] = n, the number of data points
	M[1] = E(X) = m, the mean
	M[2] = E((X-m)^2) = s^2: the variance
	M[3] = E((X-m)^3) / s^3 = E(Z^3) for Z = (X-m)/s: the skewness
	M[4] = E((X-m)^4) / s^4 = E(Z^4): the kurtosis + 3
	M[i] = E((X-m)^i) / s^i = E(Z^i), for i = 5, ..., floor(cOrder)
	
	The cOrder parameter is passed to __init__, and it can be any real or floating point number. It defaults to 2.

	There are two version of variance. One has a denominator of n in its definition, and is used when the data constitutes a finite population, or when the data is a sample from an infinite population whose mean is known. This variance is available as self['moments'][2]. The second version has a denominator of n-1 in its definition, and is used when the data is a sample from a population whose mean is estimated from the same data. This variance, more widely used in practice, is available as self['variance'], and its square root as self['std_dev'], an unbiased estimate of the population standard deviation.
	
	There are two versions of the kurtosis. The first is calculated from the moments, and represents the order-4 moment: E(Z^4). These moments are (estimates of) the coefficients in the series expansion of the underlying distribution's characteristic function. There is another set of series expansion coefficients, those of the log of the characteristic functions, called the cumulants. These are the mean, standard deviation, skewness, kurtosis proper, .... The skewness is equal to the moment of order 3, and the kurtosis is the order-4 moment minus 3.

	[I may also want to store the raw sums of squares, and calculate moments and cumulants from them. That way I can combine two separate Moments instances that I want to treat as if they were samples from the same population.]
	'''
	
	def __init__(self, lxData=None, cOrder=2):
		Statistics.__init__(self, lxData, cOrder=cOrder)
		self['order'] = cOrder
	
	def Calculate(self):
		lxData = self.getData()
		cOrder = self['order']
		n = len(lxData)
		self['n'] = n
		self['moment'] = [n]
		if cOrder < 1: return self
		
		xMean = reduce(add, lxData) / n
		self['moment'].append(xMean)
		self['mean'] = xMean
		if cOrder < 2: return self

		lx = self.Offset()             # mean(lx) == 0
		lxT = map(mul, lx, lx)         # = (lx)^2
		xSS = reduce(add, lxT)         # sum of squares main term
		xCorr = reduce(add, lx)        # correction for roundoff: calculate it ...
		xSS -= xCorr*xCorr/n           # ... and use it
		xVar, xVar1 = xSS/n, xSS/(n-1) # the two versions of variance
		self['moment'].append(xVar)    # use the first for the list of moments
		self['variance'] = xVar1       # and the second for the named fields
		self['std_dev']  = sqrt(xVar1)
		self['r_o_corr'] = xCorr       # might want to look at this sometime
		xSD = sqrt(xVar)
		if cOrder < 3: return self

		lxT = map(mul, lxT, lx)        # = (lx)^3
		xSkew = reduce(add, lxT) / n / (xVar * xSD)
		self['moment'].append(xSkew)
		self['skew'] = xSkew
		if cOrder < 4: return self

		lxT = map(mul, lxT, lx)        # = (lx)^4
		xKurt = reduce(add, lxT) / n / (xVar * xVar)
		self['moment'].append(xKurt)
		self['kurtosis'] = xKurt - 3.
		if cOrder < 5: return self
		
		iOrder = 5
		xDenominator = n * xVar * xVar
		while iOrder <= cOrder:
			lxT = map(mul, lxT, lx)    # = (lx)^iOrder
			xDenominator *= xSD
			xMoment = reduce(add, lxT) / xDenominator
			self['moment'].append(xMoment)
			iOrder += 1
		
		return self
	
	def Standardize(self, xOffset=None, xScale=None):
		if xOffset == None: xOffset = self['mean']
		if xScale  == None: xScale  = self['std_dev']
		return [(x-xOffset)/xScale for x in self.getData()]
	
	def Offset(self, xOffset=None):
		if xOffset == None: xOffset = self['mean']
		return [x-xOffset for x in self.getData()]

if __name__ == '__main__':
	
	from random import gauss
	from StatTest import Test
	
	Test(Moments([gauss(0,1) for n in range(128)], 4), globals(), locals())
