-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy patharffreader.py
31 lines (26 loc) · 987 Bytes
/
arffreader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# A simple arff reader
#
# Author: Cassio M. M. Pereira
import numpy as np
import re
def readarff(fileName, supervision = True):
""" Basic reader for arff files. If supervision = true,
treat the last dimension as the supervision and return
it separately.
"""
f = open(fileName, 'r')
ndim = len(re.findall("@attribute", f.read())) # try to guess the number of dimensions
f.close()
if supervision:
X = customload(fileName, usecols = np.arange(ndim - 1))
try:
supervision = customload(fileName, usecols = [ndim - 1]).astype(np.int)
except:
supervision = customload(fileName, usecols = [ndim - 1], dtype = np.str)
supu = np.unique(supervision)
supervision = np.array([np.where(supu == i)[0] for i in supervision]).astype(np.int)
return (X, supervision)
else:
return customload(fileName, usecols = None)
def customload(fileName, usecols, dtype = np.double):
return np.loadtxt(fileName, dtype = dtype, comments = '@', delimiter = ',', usecols = usecols)