-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
Copy pathzero_count.py
66 lines (49 loc) · 2.16 KB
/
zero_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
"""This file is part of the TPOT library.
TPOT was primarily developed at the University of Pennsylvania by:
- Randal S. Olson ([email protected])
- Weixuan Fu ([email protected])
- Daniel Angell ([email protected])
- and many more generous open source contributors
TPOT is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
TPOT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with TPOT. If not, see <http://www.gnu.org/licenses/>.
"""
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_array
class ZeroCount(BaseEstimator, TransformerMixin):
"""Adds the count of zeros and count of non-zeros per sample as features."""
def fit(self, X, y=None):
"""Dummy function to fit in with the sklearn API."""
return self
def transform(self, X, y=None):
"""Transform data by adding two virtual features.
Parameters
----------
X: numpy ndarray, {n_samples, n_components}
New data, where n_samples is the number of samples and n_components
is the number of components.
y: None
Unused
Returns
-------
X_transformed: array-like, shape (n_samples, n_features)
The transformed feature set
"""
X = check_array(X)
n_features = X.shape[1]
X_transformed = np.copy(X)
non_zero_vector = np.count_nonzero(X_transformed, axis=1)
non_zero = np.reshape(non_zero_vector, (-1, 1))
zero_col = np.reshape(n_features - non_zero_vector, (-1, 1))
X_transformed = np.hstack((non_zero, X_transformed))
X_transformed = np.hstack((zero_col, X_transformed))
return X_transformed