00001
00002
00003 """
00004 Created on Wed Feb 9 19:13:28 2011
00005
00006 @ author: Sat Kumar Tomer
00007 @ author's webpage: http://civil.iisc.ernet.in/~satkumar/
00008 @ author's email id: satkumartomer@gmail.com
00009 @ author's website: www.ambhas.com
00010
00011 """
00012 from __future__ import division
00013 from scipy.stats import kendalltau, pearsonr, spearmanr
00014 import numpy as np
00015 from scipy.integrate import quad
00016 from scipy.optimize import fmin
00017 import sys
00018 import statistics as st
00019 from scipy.interpolate import interp1d
00020 from stats import scoreatpercentile
00021
00022 class Copula():
00023 """
00024 This class estimate parameter of copula
00025 generate joint random variable for the parameters
00026 This class has following three copulas:
00027 Clayton
00028 Frank
00029 Gumbel
00030
00031 Example:
00032 x = np.random.normal(size=100)
00033 y = np.random.normal(size=100)
00034 foo = Copula(x, y, 'frank')
00035 u,v = foo.generate(100)
00036 """
00037
00038
00039 def __init__(self, X, Y, family):
00040 """ initialise the class with X and Y
00041 Input:
00042 X: one dimensional numpy array
00043 Y: one dimensional numpy array
00044 family: clayton or frank or gumbel
00045
00046 Note: the size of X and Y should be same
00047 """
00048
00049 if not ((X.ndim==1) and (Y.ndim==1)):
00050 raise ValueError('The dimension of array should be one.')
00051
00052
00053 if X.size is not Y.size:
00054 raise ValueError('The size of both array should be same.')
00055
00056
00057 copula_family = ['clayton', 'frank', 'gumbel']
00058 if family not in copula_family:
00059 raise ValueError('The family should be clayton or frank or gumbel')
00060
00061 self.X = X
00062 self.Y = Y
00063 self.family = family
00064
00065
00066 tau = kendalltau(self.X, self.Y)[0]
00067 self.tau = tau
00068
00069
00070 self.pr = pearsonr(self.X, self.Y)[0]
00071 self.sr = spearmanr(self.X, self.Y)[0]
00072
00073
00074 self._get_parameter()
00075
00076
00077 self.U = None
00078 self.V = None
00079
00080
00081 def _get_parameter(self):
00082 """ estimate the parameter (theta) of copula
00083 """
00084
00085 if self.family == 'clayton':
00086 self.theta = 2*self.tau/(1-self.tau)
00087
00088 elif self.family == 'frank':
00089 self.theta = -fmin(self._frank_fun, -5, disp=False)[0]
00090
00091 elif self.family == 'gumbel':
00092 self.theta = 1/(1-self.tau)
00093
00094 def generate_uv(self, n=1000):
00095 """
00096 Generate random variables (u,v)
00097
00098 Input:
00099 n: number of random copula to be generated
00100
00101 Output:
00102 U and V: generated copula
00103
00104 """
00105
00106 if self.family == 'clayton':
00107 U = np.random.uniform(size = n)
00108 W = np.random.uniform(size = n)
00109
00110 if self.theta <= -1:
00111 raise ValueError('the parameter for clayton copula should be more than -1')
00112 elif self.theta==0:
00113 raise ValueError('The parameter for clayton copula should not be 0')
00114
00115 if self.theta < sys.float_info.epsilon :
00116 V = W
00117 else:
00118 V = U*(W**(-self.theta/(1 + self.theta)) - 1 + U**self.theta)**(-1/self.theta)
00119
00120
00121 elif self.family == 'frank':
00122 U = np.random.uniform(size = n)
00123 W = np.random.uniform(size = n)
00124
00125 if self.theta == 0:
00126 raise ValueError('The parameter for frank copula should not be 0')
00127
00128 if abs(self.theta) > np.log(sys.float_info.max):
00129 V = (U < 0) + np.sign(self.theta)*U
00130 elif abs(self.theta) > np.sqrt(sys.float_info.epsilon):
00131 V = -np.log((np.exp(-self.theta*U)*(1-W)/W + np.exp(-self.theta)
00132 )/(1 + np.exp(-self.theta*U)*(1-W)/W))/self.theta
00133 else:
00134 V = W
00135
00136
00137 elif self.family == 'gumbel':
00138 if self.theta <= 1 :
00139 raise ValueError('the parameter for GUMBEL copula should be greater than 1')
00140 if self.theta < 1 + sys.float_info.epsilon:
00141 U = np.random.uniform(size = n)
00142 V = np.random.uniform(size = n)
00143 else:
00144 u = np.random.uniform(size = n)
00145 w = np.random.uniform(size = n)
00146 w1 = np.random.uniform(size = n)
00147 w2 = np.random.uniform(size = n)
00148
00149 u = (u - 0.5) * np.pi
00150 u2 = u + np.pi/2;
00151 e = -np.log(w)
00152 t = np.cos(u - u2/self.theta)/ e
00153 gamma = (np.sin(u2/self.theta)/t)**(1/self.theta)*t/np.cos(u)
00154 s1 = (-np.log(w1))**(1/self.theta)/gamma
00155 s2 = (-np.log(w2))**(1/self.theta)/gamma
00156 U = np.array(np.exp(-s1))
00157 V = np.array(np.exp(-s2))
00158
00159 self.U = U
00160 self.V = V
00161 return U,V
00162
00163 def generate_xy(self, n=1000):
00164 """
00165 Generate random variables (x, y)
00166
00167 Input:
00168 n: number of random copula to be generated
00169
00170 Output:
00171 X1 and Y1: generated copula random numbers
00172
00173 """
00174
00175 if self.U is None:
00176 self.generate_uv(n)
00177
00178
00179 self._inverse_cdf()
00180
00181
00182 X1 = self._inv_cdf_x(self.U)
00183 Y1 = self._inv_cdf_y(self.V)
00184 self.X1 = X1
00185 self.Y1 = Y1
00186
00187 return X1, Y1
00188
00189 def estimate(self, data=None):
00190 """
00191 this function estimates the mean, std, iqr for the generated
00192 ensemble
00193
00194 Output:
00195 Y1_mean = mean of the simulated ensemble
00196 Y1_std = std of the simulated ensemble
00197 Y1_ll = lower limit of the simulated ensemble
00198 Y1_ul = upper limit of the simulated ensemble
00199 """
00200 nbin = 50
00201
00202
00203 try:
00204 self.X1
00205 copula_ens = len(self.X1)
00206 except:
00207 copula_ens = 10000
00208 self.generate_xy(copula_ens)
00209
00210 if data is None:
00211 data = self.X
00212
00213 n_ens = copula_ens/nbin
00214 ind_sort = self.X1.argsort()
00215 x_mean = np.zeros((nbin,))
00216 y_mean = np.zeros((nbin,))
00217 y_ul = np.zeros((nbin,))
00218 y_ll = np.zeros((nbin,))
00219 y_std = np.zeros((nbin,))
00220
00221 for ii in range(nbin):
00222 x_mean[ii] = self.X1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean()
00223 y_mean[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].mean()
00224 y_std[ii] = self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]].std()
00225 y_ll[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 25)
00226 y_ul[ii] = scoreatpercentile(self.Y1[ind_sort[n_ens*ii:n_ens*(ii+1)]], 75)
00227
00228 foo_mean = interp1d(x_mean, y_mean, bounds_error=False)
00229 foo_std = interp1d(x_mean, y_std, bounds_error=False)
00230 foo_ll = interp1d(x_mean, y_ll, bounds_error=False)
00231 foo_ul = interp1d(x_mean, y_ul, bounds_error=False)
00232
00233
00234 Y1_mean = foo_mean(data)
00235 Y1_std = foo_std(data)
00236 Y1_ll = foo_ll(data)
00237 Y1_ul = foo_ul(data)
00238
00239 return Y1_mean, Y1_std, Y1_ll, Y1_ul
00240
00241
00242
00243 def _inverse_cdf(self):
00244 """
00245 This module will calculate the inverse of CDF
00246 which will be used in getting the ensemble of X and Y from
00247 the ensemble of U and V
00248
00249 The statistics module is used to estimate the CDF, which uses
00250 kernel methold of cdf estimation
00251
00252 To estimate the inverse of CDF, interpolation method is used, first cdf
00253 is estimated at 100 points, now interpolation function is generated
00254 to relate cdf at 100 points to data
00255 """
00256 x2, x1 = st.cpdf(self.X, kernel = 'Epanechnikov', n = 100)
00257 self._inv_cdf_x = interp1d(x2, x1)
00258
00259 y2, y1 = st.cpdf(self.Y, kernel = 'Epanechnikov', n = 100)
00260 self._inv_cdf_y = interp1d(y2, y1)
00261
00262
00263 def _integrand_debye(self,t):
00264 """
00265 Integrand for the first order debye function
00266 """
00267 return t/(np.exp(t)-1)
00268
00269 def _debye(self, alpha):
00270 """
00271 First order Debye function
00272 """
00273 return quad(self._integrand_debye, sys.float_info.epsilon, alpha)[0]/alpha
00274
00275 def _frank_fun(self, alpha):
00276 """
00277 optimization of this function will give the parameter for the frank copula
00278 """
00279 diff = (1-self.tau)/4.0 - (self._debye(-alpha)-1)/alpha
00280 return diff**2
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290