-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbinarySVM.py
More file actions
339 lines (292 loc) · 14 KB
/
binarySVM.py
File metadata and controls
339 lines (292 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
from __future__ import print_function
from __future__ import division
import numpy as np
import skimage
from skimage import io,data
import glob
#import openface
#import cv2
#import dlib
#import csv
import scipy.io
from sklearn import svm, datasets
import sklearn.model_selection
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib as plt
from matplotlib import cm
from sklearn.externals import joblib
from scipy.cluster.vq import whiten,kmeans
def LoadData(basepath,typeOfFeat='dlib',person='gianni', typeofdata='testing'):
''' This function load the corresponding npy file with the features.
Output is a table of features of a particular ID, of a particular
set, obtained with a particular net'''
alldata = np.load(basepath+typeofdata+"_"+person+"_feat.npy")
if typeOfFeat == 'dlib':
return alldata[0]
else:
return alldata[1]
def calcSelected(predProb,ths,othersId):
''' This is the probability based classifier. If an output is lower than a treshold,
the classifier chooses for the <<others>> pseudo-class. Arguments are:
predProb = Prediction probabilities, a matrix with per class probabilities (on columns) of every image
ths = threshold used for the decision
othersId = index of the "others" class
The output is an array of lenght = predProb.shape[0] with a decision for avery feature.'''
selectedClass = []
for i in range(predProb.shape[0]): #cicla su ogni immagine
maxId = np.argmax(predProb[i]) #trova il numero della classe a probabilità più alta
if predProb[i][maxId] > ths: #se il valore di probabilità è maggiore della soglia
selectedClass.append(maxId) #scegli per quella classe
else:
selectedClass.append(othersId) #altrimenti è "others"
selectedClass = np.asarray(selectedClass)
return selectedClass
def createBinaryDataSet(trainWho,negIdxTab,origSet,NSample):
""" From a multiclass set, this function creates a binary dataset.
The function return a table of features and the corresponding labels.
Arguments list:
-Index of the class to be used as positive, (['gianni','stefano','sergio','jhilick','others'])
-origin dataset,
-samples per binary class,
-useOthers as negative"""
#positives
smallValidFeat = np.zeros((2*NSample,128))
smallValidLabels = np.zeros(2*NSample)
PosValidSetTemp = origSet[trainWho].copy()
np.random.shuffle(PosValidSetTemp)
smallValidFeat[0:NSample-1] = PosValidSetTemp[0:NSample-1]
smallValidLabels[0:NSample-1] = 1
#negatives
indexNow = NSample
negLen = len(negIdxTab)
NegNSample = int(np.floor(NSample/negLen))
for negIdx in range(negLen):
NegSetTemp = origSet[int(negIdxTab[negIdx])].copy()
np.random.shuffle(NegSetTemp)
smallValidFeat[indexNow:indexNow+NegNSample-1] = NegSetTemp[0:NegNSample-1]
smallValidLabels[indexNow:indexNow+NegNSample-1] = 0
indexNow += NegNSample
if indexNow < 2*NSample:
diff = 2*NSample - indexNow
smallValidFeat[indexNow:indexNow+diff-1] = NegSetTemp[indexNow:diff-1]
smallValidLabels[indexNow:indexNow+diff-1] = 0
return smallValidFeat,smallValidLabels
def CreatecolorTuple(f):
''' This function creates a colormap'''
a = (1-f)/0.25; #invert and group
X = np.floor(a); #this is the integer part
Y = np.floor(255*(a-X)); #fractional part from 0 to 255
if X == 0:
r = 255
g = Y
b = 0
elif X == 1:
r = 255-Y
g = 255
b = 0
elif X == 2:
r = 0
g = 255
b = Y
elif X == 3:
r = 0
g = 255-Y
b = 255
elif X == 4:
r = 0
g = 0
b = 255
r /= 255.0
g /= 255.0
b /= 255.0
return (r,g,b)
######################## START
FeatTYPE = 'openface' #openface o dlib
typeOfFeat = FeatTYPE
basepath = "/home/francesco/perLuca/sameImgs/" #folder feature condivise
basepathsavefig = '/home/francesco/test_svm/'
peopleName = ['gianni','stefano','sergio','jhilick','others']
folderName = ['testing','training','verification']
#testing_gianni_feat.npy
NsamplePerBClass = [25,50,100,200,400,600,800,1000,1100] # how many samples for binary class
NTEST = 20 # how many classifiers train for a sampleNumber
TIMESVALID = 20 # how many validation for choosing a threshold
REPEATTEST = 20 # how many test on one trained classifier
#class to be trained
#trainWho = 3
avgAcc = np.zeros((len(peopleName)-1,len(NsamplePerBClass)))
avgVarAcc = np.zeros((len(peopleName)-1,len(NsamplePerBClass)))
#figallacc = plt.pyplot.figure(figsize=(16,10),dpi=400)
for trainWho in range(len(peopleName)-1):
#we can use JET colormap
#colorTuple = []
#for i in range(len(NsamplePerBClass)):
# colorTuple.append(CreatecolorTuple(i/len(NsamplePerBClass)))
#colorTuple = np.asarray(colorTuple)
#print(colorTuple)
#validation: 150 id, 150 others
NSampleForValid = 150
# test : 80 img ID, 80 img others
NSampleForTest = 80
testFeat = []
trainFeat = []
validFeat = []
for name in peopleName:
testFeat.append(LoadData(typeOfFeat=typeOfFeat,basepath=basepath,typeofdata='testing',person=name))
trainFeat.append(LoadData(typeOfFeat=typeOfFeat,basepath=basepath,typeofdata='training',person=name))
validFeat.append(LoadData(typeOfFeat=typeOfFeat,basepath=basepath,typeofdata='verification',person=name))
print("name,#testing,#training,#verification")
for nameidx in range(len(peopleName)):
print(peopleName[nameidx],testFeat[nameidx].shape[0],trainFeat[nameidx].shape[0],validFeat[nameidx].shape[0])
print('dataset loaded')
realNumberOfPeople = len(peopleName)-1
fig = plt.pyplot.figure(figsize=(16,10),dpi=400)
best_th = 0
p = np.arange(0,1,0.01)
testRes = np.zeros(len(NsamplePerBClass))
varianceForClassTest = []
# samples per class loop
for Nsidx in range(len(NsamplePerBClass)):
print("Use ",NsamplePerBClass[Nsidx])
nsampleNow = NsamplePerBClass[Nsidx]
smallTrainFeat = np.zeros((2*nsampleNow,128)) # quanti devo caricare
smallTrainLabel = np.zeros(2*nsampleNow)
print(smallTrainFeat.shape)
testcurve = np.zeros((NTEST,p.shape[0])) #accuracy curve per test
for testN in range(NTEST): #test per classificatore
negIdxTab = np.ones(len(peopleName))
negIdxTab[trainWho] = 0
negIdxTab[realNumberOfPeople] = 0 #others are ath the end
idxr = np.arange(negIdxTab.shape[0])
negIdxTab =(negIdxTab*idxr)[negIdxTab == 1]
smallTrainFeat,smallTrainLabel = createBinaryDataSet(trainWho,negIdxTab,trainFeat,NSampleForValid)
#print('Trainset done')
######## PUT HERE YOUR CLASSIFIER TRAINING
model = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear', #kernel='rgf' #overfit
max_iter=-1, probability=True, random_state=None, shrinking=True,
tol=0.001, verbose=True)
#print('Fitting model')
x_train = smallTrainFeat
y_train = smallTrainLabel
model.fit(x_train, y_train)
score = model.score(x_train, y_train)
#print("Model accuracy",score)
#joblib.dump(model, "svm_dlib.pkl") # save SVM
#print('Start validation')
veriaccuracy = np.zeros((TIMESVALID,p.shape[0]))
for k in range(TIMESVALID): #repeat many times validation
smallValidFeat, smallValidLabels = createBinaryDataSet(trainWho,[realNumberOfPeople],validFeat,NSampleForTest)
#Validation set complete
x_veri = smallValidFeat
y_veri = smallValidLabels
predicted_proba = model.predict_proba(x_veri)
boolmat = np.zeros((p.size,y_veri.shape[0]))
eqlabel = np.zeros_like(p)
correctpred = np.zeros_like(p)
for j in range(0,p.size):
boolmat[j] = calcSelected(predicted_proba,p[j],0)
eqlabel[j] = np.sum(np.equal(y_veri,boolmat[j]))
correctpred[j] = eqlabel[j]/y_veri.size
veriaccuracy[k] = correctpred.copy() # accuracy curve for this validation
# CHOOSE HYPERPARAMETER
meanveriFun = np.mean(veriaccuracy,axis=0)
best_th = np.argmax(meanveriFun) # manually selected
choosedTh = p[best_th]
print("Choosed threshold:",choosedTh)
#plot validation
#print("Threshold choosed at:", choosedTh)
#plt.pyplot.figure()
#plt.pyplot.xlabel("Threshold")
#plt.pyplot.ylabel("accuracy")
#plt.pyplot.title(FeatTYPE + " - Acc = Combined accuracy(threshold)")
#plt.pyplot.plot(p,meanveriFun)
#plt.pyplot.grid(True)
#plt.pyplot.show()
#TESTING
print('Testing')
alltest = np.zeros((REPEATTEST,p.shape[0]))
for jj in range(REPEATTEST):
x_test,y_test = createBinaryDataSet(trainWho,[realNumberOfPeople],testFeat,NSampleForTest)
# RUN YOUR CLASSIFIER HERE
# the script expect a vector of real number, one for each class
predicted_proba = model.predict_proba(x_test)
boolmat = np.zeros((p.size,y_test.shape[0]))
eqlabel = np.zeros_like(p)
correctpred = np.zeros_like(p)
for j in range(0,p.size):
boolmat[j] = calcSelected(predicted_proba,p[j],0)
eqlabel[j] = np.sum(np.equal(y_test,boolmat[j]))
correctpred[j] = eqlabel[j]/y_test.size
alltest[jj] = correctpred.copy()
testcurve[testN] = np.mean(alltest,axis=0).copy() #media dei test su tanti samples per un classificatore
meanTestFun = np.mean(testcurve,axis=0) # average on many classifiers
testRes[Nsidx] = meanTestFun[best_th] # select accuracy with precalculated threshold
# plot statistics for a sample number
plt.pyplot.plot(p,meanTestFun,label=str(nsampleNow),color=cm.jet(Nsidx/len(NsamplePerBClass)))
plt.pyplot.scatter(choosedTh,testRes[Nsidx],color=cm.jet(Nsidx/len(NsamplePerBClass)))
plt.pyplot.text(choosedTh,testRes[Nsidx],str("%.2f"%testRes[Nsidx]),color=cm.jet(Nsidx/len(NsamplePerBClass)))
varNow = np.std(testcurve,axis=0)[best_th]
varianceForClassTest.append(varNow)
plt.pyplot.errorbar(p[best_th],testRes[Nsidx],yerr=varNow,fmt='o',color=cm.jet(Nsidx/len(NsamplePerBClass)))
print(testRes)
# end of sample number loop for one class
plt.pyplot.title("Test accuracy(threshold), - " + FeatTYPE + "- class: "+str(trainWho) + "_" + peopleName[trainWho],fontsize=30)
plt.pyplot.xlabel("Threshold",fontsize=25)
plt.pyplot.ylabel("Accuracy",fontsize=25)
axes = fig.gca()
axes.set_ylim([0.8,1])
plt.pyplot.grid(True)
#plt.pyplot.axvline(choosedTh,linewidth=3)
plt.pyplot.legend()
plt.pyplot.show()
fig.savefig(basepathsavefig +FeatTYPE+"_"+str(trainWho)+"_"+peopleName[trainWho]+"accuracy_p_" + '.png', bbox_inches='tight',dpi=300)
fig = plt.pyplot.figure(figsize=(16,10),dpi=400)
plt.pyplot.plot(np.asarray(NsamplePerBClass),testRes)
plt.pyplot.errorbar(np.asarray(NsamplePerBClass),testRes,yerr=np.asarray(varianceForClassTest),fmt='o')
#plt.pyplot.text(np.asarray(NsamplePerBClass),testRes,str("%.2f"%testRes[Nsidx]))
plt.pyplot.title('Accuracy(Samples per class) -' + str(trainWho) +' ' + peopleName[trainWho] + ' ' + FeatTYPE,fontsize=30)
plt.pyplot.xlabel('Samples per binary class during training',fontsize=25)
plt.pyplot.ylabel("Accuracy",fontsize=25)
axes = fig.gca()
axes.set_ylim([0.8,1])
plt.pyplot.grid(True)
plt.pyplot.show()
fig.savefig(basepathsavefig +FeatTYPE+ "_"+ str(trainWho)+"_"+peopleName[trainWho]+" accuracy_p_N" + '.png', bbox_inches='tight',dpi=300)
#np.save(basepathsavefig + FeatType + + "_" + str(trainWho)+"_"+peopleName[trainWho]+ "accN",testRes)
#np.save(basepathsavefig + FeatType + + "_" + str(trainWho)+"_"+peopleName[trainWho]+ "acc_VarN",np.asarray(varianceForClassTest))
avgAcc[trainWho] = testRes.copy()
avgVarAcc[trainWho] = np.asarray(varianceForClassTest)
# end of ID loop
avgAccfinal = np.mean(avgAcc,axis=0)
avgVarAccfinal = np.mean(avgVarAcc,axis=0)
print(avgAcc)
print(avgVarAccfinal)
fig = plt.pyplot.figure(figsize=(16,10),dpi=400)
plt.pyplot.plot(np.asarray(NsamplePerBClass),avgAccfinal)
plt.pyplot.errorbar(np.asarray(NsamplePerBClass),avgAccfinal,yerr=avgVarAccfinal,fmt='o')
for i in range(len(avgAccfinal)):
plt.pyplot.text(np.asarray(NsamplePerBClass)[i],avgAccfinal[i],np.asarray(str("%.3f"%avgAccfinal[i])),color='k',fontsize=13)
plt.pyplot.text(np.asarray(NsamplePerBClass)[i],
avgAccfinal[i]+avgVarAccfinal[i],
np.asarray(str("%.4f"%(avgVarAccfinal[i]))),color='k',fontsize=13)
plt.pyplot.title('Average Accuracy(Samples per class) - ' + FeatTYPE,fontsize=25)
plt.pyplot.xlabel('Samples per binary class during training',fontsize=20)
plt.pyplot.ylabel("Accuracy",fontsize=20)
#axes = fig.gca()
#axes.set_ylim([0.8,1])
plt.pyplot.grid(True)
plt.pyplot.show()
fig.savefig(basepathsavefig +FeatTYPE+ "_"+ " average accuracy_p_N" + '.png', bbox_inches='tight',dpi=300)
fig = plt.pyplot.figure(figsize=(16,10),dpi=400)
plt.pyplot.plot(np.asarray(NsamplePerBClass),avgVarAccfinal/avgAccfinal)
plt.pyplot.title('Average var/acc ' + FeatTYPE,fontsize=25)
plt.pyplot.xlabel('Samples per binary class during training',fontsize=20)
plt.pyplot.ylabel("var/acc",fontsize=20)
#axes = fig.gca()
#axes.set_ylim([0.8,1])
plt.pyplot.grid(True)
plt.pyplot.show()
fig.savefig(basepathsavefig +FeatTYPE+ "_"+ " average varRatio_p_N" + '.png', bbox_inches='tight',dpi=300)
print('All done')