1 """!@brief Script by Mathieu Fauvel which performs Gaussian Mixture Model
4 from scipy
import linalg
5 import multiprocessing
as mp
9 err = sp.zeros(tau.size)
10 for j,t
in enumerate(tau):
11 yp = model.predict(xT,tau=t)[0]
12 eq = sp.where(yp.ravel()==yT.ravel())[0]
13 err[j] = eq.size*100.0/yT.size
19 This class implements the generation of several folds to be used in the cross validation
26 ''' The function split the data into v folds. Whatever the number of sample per class
28 n : the number of samples
29 v : the number of folds
34 t = sp.random.permutation(n)
38 indices.append(t[i*step:(i+1)*step])
39 indices.append(t[(v-1)*step:n])
42 self.iT.append(sp.asarray(indices[i]))
45 temp = sp.empty(0,dtype=sp.int64)
47 temp = sp.concatenate((temp,sp.asarray(indices[j])))
51 ''' The function split the data into v folds. The samples of each class are split approximatly in v folds
53 n : the number of samples
54 v : the number of folds
59 C = y.max().astype(
'int')
68 t = sp.where(y==(i+1))[0]
72 print "Not enough sample to build "+ str(v) +
" folds in class " + str(i)
74 tc = t[sp.random.permutation(nc)]
78 start,end = j*stepc,(j+1)*stepc
80 start,end = j*stepc,nc
81 tempiT.extend(sp.asarray(tc[start:end]))
86 start,end = l*stepc,(l+1)*stepc
88 start,end = l*stepc,nc
89 tempit.extend(sp.asarray(tc[start:end]))
91 self.it.append(tempit)
92 self.iT.append(tempiT)
107 Function that learns the GMM with ridge regularizationb from training samples
109 x : the training samples
112 the mean, covariance and proportion of each class, as well as the spectral decomposition of the covariance matrix
119 eps = sp.finfo(sp.float64).eps
122 self.
ni = sp.empty((C,1))
123 self.
prop = sp.empty((C,1))
124 self.
mean = sp.empty((C,d))
125 self.
cov = sp.empty((C,d,d))
126 self.
Q = sp.empty((C,d,d))
127 self.
L = sp.empty((C,d))
131 j = sp.where(y==(c+1))[0]
132 self.
ni[c] = float(j.size)
133 self.
prop[c] = self.
ni[c]/n
134 self.
mean[c,:] = sp.mean(x[j,:],axis=0)
135 self.
cov[c,:,:] = sp.cov(x[j,:],bias=1,rowvar=0)
138 L,Q = linalg.eigh(self.
cov[c,:,:])
139 idx = L.argsort()[::-1]
141 self.
Q[c,:,:]=Q[:,idx]
145 Function that predict the label for sample xt using the learned model
147 xt: the samples to be classified
150 K: the decision value for each class
165 cst = logdet - 2*sp.log(self.
prop[c])
167 xtc = xt-self.
mean[c,:]
168 temp = sp.dot(invCov,xtc.T).T
169 K[:,c] = sp.sum(xtc*temp,axis=1)+cst
173 yp = sp.argmin(K,1)+1
181 temp = self.
Q[c,:,:]*(1/Lr)
182 invCov = sp.dot(temp,self.
Q[c,:,:].T)
183 logdet = sp.sum(sp.log(Lr))
186 def BIC(self,x,y,tau=None):
188 Computes the Bayesian Information Criterion of the model
191 C,d = self.mean.shape
201 P = C*(d*(d+3)/2) + (C-1)
207 j = sp.where(y==(c+1))[0]
210 cst = logdet - 2*sp.log(self.
prop[c])
212 temp = sp.dot(invCov,xi.T).T
213 K = sp.sum(xi*temp,axis=1)+cst
221 Function that computes the cross validation accuracy for the value tau of the regularization
223 x : the training samples
225 tau : a range of values to be tested
226 v : the number of fold
228 err : the estimated error with cross validation for all tau's value
234 cv.split_data_class(y)
240 model_cv.append(
GMMR())
241 model_cv[i].
learn(x[cv.it[i],:], y[cv.it[i]])
245 processes = [pool.apply_async(predict,args=(tau,model_cv[i],x[cv.iT[i],:],y[cv.iT[i]]))
for i
in range(v)]
253 for model
in model_cv:
256 del processes,pool,model_cv
258 return tau[err.argmax()],err
def compute_inverse_logdet
def predict
Temporary predict function.
ni
Get information from the data.