-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
162 lines (138 loc) · 5.45 KB
/
main.py
File metadata and controls
162 lines (138 loc) · 5.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""
File for running our algorithms on the dataset
Authors: Kenny, Raymond, Real Rick
Date: 4/25/2019
"""
from preprocess import *
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score
from SVM import *
from naiveBayes import *
from decisionTrees import *
import MFC as MFC
import optparse, sys
import matplotlib.pyplot as plt
def parse_opts(opts, parser):
"""
Purpose - Parses command line arguments to decide whether or not to
perform binary or multiclass classification
Params - opts: command line arguments
parser: parser object
Returns - boolean, true if user wants a binary classification task, false
otherwise
"""
mandatory = 'task'
if not opts.__dict__[mandatory]:
print('mandatory option ' + mandatory + ' is missing!\n')
parser.print_help()
sys.exit(1)
# now figure out which task to do
task = opts.task
if task == 'binary':
return True
elif task == 'multiclass':
return False
else:
print("ERROR: Unrecognized task! Use options 'binary' or 'multiclass'\n")
parser.print_help()
sys.exit(1)
def main():
parser = optparse.OptionParser(description='main.py')
parser.add_option('-t', '--task', \
help="use 'binary' for binary classification, 'multiclass' for multiclass")
opts = parser.parse_args()[0]
binary = parse_opts(opts, parser)
data = Data("data/adult.data")
if binary:
data.readData(binary=True)
else:
data.readData()
data.createSVMDataset()
print("\n====================================================")
print("SVM Revving Up...")
print("====================================================")
#creates multi-class SVM model
svmClassifier = SVM()
#train SVM model
svmClassifier.trainSVM(data.SVMTrain, data.yTrain)
#evaluate test data
predictions = svmClassifier.testSVM(data.SVMTest)
# evaluate accuracy and print confusoin matrix
svm_score = svmClassifier.evaluate(data.SVMTest, data.yTest, predictions)
# feature analysis for binary classification task
if binary:
print("\nSee feature importance graph...")
svmClassifier.visualizeWeights(data.SVMFeatures, data.SVMFeatureMeans)
# uncomment this code to perform hyperparameter tuning for the SVC
# classifier
"""
print("\n====================================================")
print("\t Starting hyper-parameter tuning...")
print("====================================================")
svc_params = {"C": np.logspace(-10, 10, 21)}
svmClassifier.runTuneTest(svc_params, data.SVMTrain, data.yTrain)
svmClassifier.printTestScores()
print("SVM Training Complete!")
print("====================================================")
"""
dTreeFeats = data.createNBDataset()
#Naive Bayes
print("\n====================================================")
print("Starting Naive Bayes...")
print("====================================================")
print("Making naive assumptions...")
print("----------------------------------------------------")
#splits the train and test data into example and labels
nbTrainX = data.NBdataTrain
nbTrainY = data.yTrain
nbTestX = data.NBdataTest
nbTestY = data.yTest
#trains the naive bayes classifier
naiveBayesClassifier = NaiveBayes()
naiveBayesClassifier.trainNB(nbTrainX, nbTrainY)
#test our model on the test data and get predictions
nbPredictions = naiveBayesClassifier.testNB(nbTestX)
#evaluate the accuracy of nb model
nb_score = naiveBayesClassifier.evaluate(nbTestY, nbPredictions)
#DecisionTree
print("\n====================================================")
print("Planting Decision Tree Seeds...")
print("====================================================")
print("Watering soil...")
print("----------------------------------------------------")
#creates decision tree
decisionTreeClassifier = DecisionTree()
#split the train and test data into example and labels
treeTrainX = data.DTreeDataTrain
treeTrainY = data.yTrain
treeTestX = data.DTreeDataTest
treeTestY = data.yTest
print(len(treeTrainX))
#train the decision tree model
decisionTreeClassifier.trainTree(treeTrainX, treeTrainY)
print("Tree is fully Grown!\n")
#Test our model on the test data and get predictions
predictions = decisionTreeClassifier.testTree(treeTestX)
#evaluate the accuracy
dtree_score = decisionTreeClassifier.evaluate(treeTestX, treeTestY, predictions)
#decisionTreeClassifier.visualize(dTreeFeats)
#MFC
print("\n====================================================")
print("Most Frequent Class Baseline")
print("====================================================")
mfc_score = MFC.evaluate(nbTrainY, nbTestY)
print("\nSee overall score graph...\n")
# plot comparative bar graph
scores = [svm_score, nb_score, dtree_score, mfc_score]
x_labels = ['SVM', 'Naive Bayes', 'DTree', 'MFC']
y_pos = np.arange(4)
plt.bar(y_pos, scores, align='center', width=0.5)
plt.xticks(y_pos, x_labels)
plt.ylabel("Accuracy")
plt.title("Accuracy Scores for Different Classifiers")
plt.show()
print("====================================================")
print("PROGRAM COMPLETE!")
print("====================================================")
if __name__ == "__main__":
main()