Skip to content

Commit ad97431

Browse files
committed
Merge branch 'master' of github.com:tobybreckon/python-examples-ml
Conflicts: README.md
2 parents 48ad776 + 26e27d1 commit ad97431

File tree

7 files changed

+574
-11
lines changed

7 files changed

+574
-11
lines changed

ensemblelearning/boosttree.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def unroll_responses(number_of_class, responses):
4949
new_responses[resp_idx] = 1
5050
return new_responses
5151

52-
########### Load Training and Testing Data SetsTypeError: can only concatenate tuple (not "int") to tuple
52+
########### Load Training and Testing Data Sets
5353

5454
# load training data set
5555

@@ -127,7 +127,7 @@ def unroll_responses(number_of_class, responses):
127127

128128
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
129129
#
130-
# As currently boosted tree classifier in OpenCV can only be trained
130+
# As currently the boosted tree classifier in OpenCV can only be trained
131131
# for 2-class problems, we transform the training data set by
132132
# "unrolling" each training sample as many times as the number of
133133
# classes (10) that we have.

knn/knn.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@
6464

6565
knn.setAlgorithmType(cv2.ml.KNEAREST_BRUTE_FORCE);
6666

67-
# set default 3, can be changed at query time in predict() call
68-
69-
knn.setDefaultK(3);
70-
7167
# set up classification, turning off regression
7268

7369
knn.setIsClassifier(True);

neuralnetwork/nnetwork1.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
#####################################################################
2+
3+
# Example : Neural Network based learning
4+
# basic illustrative python script
5+
6+
# For use with test / training datasets : spambase.{train | test}
7+
8+
# Author : Toby Breckon, [email protected]
9+
10+
# Copyright (c) 2014 / 16 School of Engineering & Computing Sciences,
11+
# Durham University, UK
12+
# License : LGPL - http://www.gnu.org/licenses/lgpl.html
13+
14+
#####################################################################
15+
16+
import csv
17+
import cv2
18+
import numpy as np
19+
20+
########### Define classes
21+
22+
classes = {1 : 'spam', 0 : 'ham (not spam)'}
23+
24+
#####################################################################
25+
26+
########### construct output layer
27+
28+
# expand training responses defined as class labels {0,1...,N} to output layer
29+
# responses for the OpenCV MLP (Neural Network) implementation such that class
30+
# label c becomes {0,0,0, ... 1, ...0} where the c-th entry is the only non-zero
31+
# entry (equal to "value", conventionally = 1) in the N-length vector
32+
33+
# labels : a row vector of class label transformed to {0,0,0, ... 1, ...0}
34+
# max_classes : maximum class label
35+
# value: value use to label the class response in the output layer vector
36+
# sigmoid : {true | false} - return {-value,....value,....-value} instead for
37+
# optimal use with OpenCV sigmoid function
38+
39+
def class_label_to_nn_output(label, max_classes, is_sigmoid, value):
40+
if (is_sigmoid):
41+
output = np.ones(max_classes).astype(np.float32) * (-1 * value)
42+
output[int(label)] = value
43+
else:
44+
output = np.zeros(max_classes).astype(np.float32)
45+
output[int(label)] = value
46+
47+
return output
48+
49+
#####################################################################
50+
51+
########### Load Training and Testing Data Sets
52+
53+
# load training data set
54+
55+
reader=csv.reader(open("spambase.train","rt", encoding='ascii'),delimiter=',')
56+
57+
58+
attribute_list = []
59+
label_list = []
60+
nn_outputs_list = []
61+
62+
#### N.B there is a change in the loader here (compared to other examples)
63+
64+
for row in reader:
65+
# attributes in columns 0-56, class label in last column,
66+
attribute_list.append(list(row[i] for i in (list(range(0,57)))))
67+
label_list.append(row[57])
68+
nn_outputs_list.append(class_label_to_nn_output(row[57], len(classes), True, 1))
69+
70+
training_attributes=np.array(attribute_list).astype(np.float32)
71+
training_class_labels=np.array(label_list).astype(np.float32)
72+
training_nn_outputs=np.array(nn_outputs_list).astype(np.float32)
73+
74+
# load testing data set
75+
76+
reader=csv.reader(open("spambase.test","rt", encoding='ascii'),delimiter=',')
77+
78+
attribute_list = []
79+
label_list = []
80+
nn_outputs_list = []
81+
82+
for row in reader:
83+
# attributes in columns 0-56, class label in last column,
84+
attribute_list.append(list(row[i] for i in (list(range(0,57)))))
85+
label_list.append(row[57])
86+
87+
testing_attributes=np.array(attribute_list).astype(np.float32)
88+
testing_class_labels=np.array(label_list).astype(np.float32)
89+
90+
############ Perform Training -- Neural Network
91+
92+
# create the network object
93+
94+
nnetwork = cv2.ml.ANN_MLP_create();
95+
96+
# define number of layers, sizes of layers and train neural network
97+
# neural networks only support numerical inputs (convert any categorical inputs)
98+
99+
# set the network to be 2 layer 57->10->2
100+
# - one input node per attribute in a sample
101+
# - 10 hidden nodes
102+
# - one output node per class
103+
# defined by the column vector layer_sizes
104+
105+
layer_sizes = np.int32([57, 10, len(classes)]); # format = [inputs, hidden layer n ..., output]
106+
nnetwork.setLayerSizes(layer_sizes);
107+
108+
# create the network using a sigmoid function with alpha and beta
109+
# parameters = 1 specified respectively (standard sigmoid)
110+
111+
nnetwork.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM, 1, 1);
112+
113+
# available activation functions = (cv2.ml.ANN_MLP_SIGMOID_SYM or cv2.ml.ANN_MLP_IDENTITY, cv2.ml.ANN_MLP_GAUSSIAN)
114+
115+
# specify stopping criteria and backpropogation for training
116+
117+
nnetwork.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP);
118+
nnetwork.setBackpropMomentumScale(0.1);
119+
nnetwork.setBackpropWeightScale(0.1);
120+
121+
nnetwork.setTermCriteria((cv2.TERM_CRITERIA_COUNT + cv2.TERM_CRITERIA_EPS, 1000, 0.001))
122+
123+
## N.B. The OpenCV neural network (MLP) implementation does not
124+
## support categorical variable output explicitly unlike the
125+
## other OpenCV ML classes.
126+
## Instead, following the traditional approach of neural networks,
127+
## the output class label is formed by we a binary vector that
128+
## corresponds the desired output layer result for a given class
129+
## e.g. {0, 0 ... 1, 0, 0} components (one element by class) where
130+
## an entry "1" in the i-th vector position correspondes to a class
131+
## label for class i
132+
## for optimal performance with the OpenCV intepretation of the sigmoid
133+
## we use {-1, -1 ... 1, -1, -1}
134+
135+
## prior to training we must construct these output layer responses
136+
## from our conventional class labels (carried out by class_label_to_nn_output()
137+
138+
# train the neural network (using training data)
139+
140+
nnetwork.train(training_attributes, cv2.ml.ROW_SAMPLE, training_nn_outputs);
141+
142+
############ Perform Testing -- Neural Network
143+
144+
tp = 0 # spam
145+
tn = 0 # ham
146+
fp = 0 # classed as spam, but is ham
147+
fn = 0 # classed as ham, but is spam
148+
149+
# for each testing example
150+
151+
for i in range(0, len(testing_attributes[:,0])) :
152+
153+
# perform neural network prediction (i.e. classification)
154+
155+
# (to get around some kind of OpenCV python interface bug, vertically stack the
156+
# example with a second row of zeros of the same size and type which is ignored).
157+
158+
sample = np.vstack((testing_attributes[i,:],
159+
np.zeros(len(testing_attributes[i,:])).astype(np.float32)));
160+
161+
retrval,output_layer_responses = nnetwork.predict(sample);
162+
163+
# the class label c (result) is the index of the most
164+
# +ve of the output layer responses (from the first of the two examples in the stack)
165+
166+
result = np.argmax(output_layer_responses[0]);
167+
168+
print("Test data example : " + str(i + 1) + " : result = " + str(classes[int(result)]))
169+
170+
# record results as tp/tn/fp/fn
171+
172+
if (result == testing_class_labels[i] == 1) : tp+=1
173+
elif (result == testing_class_labels[i] == 0) : tn+=1
174+
elif (result != testing_class_labels[i]) :
175+
if ((result == 1) and (testing_class_labels[i] == 0)) : fp+=1
176+
elif ((result == 0) and (testing_class_labels[i] == 1)) : fn+=1
177+
178+
# output summmary statistics
179+
180+
total = tp + tn + fp + fn
181+
correct = tp + tn
182+
wrong = fp + fn
183+
184+
print()
185+
print("Testing Data Set Performance Summary")
186+
print("TP : " + str(round((tp / float(total)) * 100, 2)) + "%")
187+
print("TN : " + str(round((tn / float(total)) * 100, 2)) + "%")
188+
print("FP : " + str(round((fp / float(total)) * 100, 2)) + "%")
189+
print("FN : " + str(round((fn / float(total)) * 100, 2)) + "%")
190+
print("Total Correct : "+ str(round((correct / float(total)) * 100, 2)) + "%")
191+
print("Total Wrong : "+ str(round((wrong / float(total)) * 100, 2)) + "%")
192+
193+
#####################################################################

0 commit comments

Comments
 (0)