Skip to content

Commit 7a90d83

Browse files
committed
setup.py
1 parent dd84b17 commit 7a90d83

File tree

12 files changed

+283
-46
lines changed

12 files changed

+283
-46
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# __init__.py
2+
3+
4+
__version__ = "0.1"
5+
6+
7+
# from .normalizations import percentile_normalization
8+
# from .normalizations import total_count_normalization
9+
# from .normalizations import quartile_normalization
10+
# from .normalizations import tmm_normalization
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import numpy as np
2+
from sys import stderr
3+
from scipy.stats import rankdata
4+
from utils import percentile
5+
6+
#===============================================================================
7+
8+
def total_count_normalization(matrix):
9+
"""
10+
Total count normalization
11+
12+
Parameters
13+
----------
14+
matrix : array_like
15+
Matrix to normalize.
16+
17+
Returns
18+
-------
19+
array_like
20+
Normalized matrix.
21+
"""
22+
return matrix / matrix.sum(axis=0)
23+
24+
#===============================================================================
25+
26+
def percentile_normalization(matrix, p):
27+
"""
28+
Percentile normalization
29+
30+
Parameters
31+
----------
32+
matrix : array_like
33+
Matrix to normalize.
34+
p : float in range of [0,100]
35+
Percentile to compute, which must be between 0 and 100 inclusive.
36+
37+
Returns
38+
-------
39+
array_like
40+
Normalized matrix.
41+
"""
42+
return matrix / percentile(matrix, p)
43+
44+
#===============================================================================
45+
46+
def quartile_normalization(matrix, q):
47+
"""
48+
Quartile normalization
49+
50+
Parameters
51+
----------
52+
matrix : array_like
53+
Matrix to normalize.
54+
q : string from {"lower", "median", "upper"} or quartile number (1, 2 or 3)
55+
The names of quartiles to compute in accordance:
56+
"lower" = 1,
57+
"median" = 2,
58+
"upper" = 3.
59+
60+
Returns
61+
-------
62+
array_like
63+
Normalized matrix.
64+
"""
65+
d = {"upper": 75, "lower": 25, "median": 50, 3: 75, 1: 25, 2: 50}
66+
assert q in d, 'Unexpected quartile for normalization: "' + str(q) + '"'
67+
return percentile_normalization(matrix, d[q])
68+
69+
#===============================================================================
70+
71+
def tmm_normalization(matrix, index_ref=None, trim_fold_change=0.3, trim_abs_expr=0.05):
72+
"""
73+
Trimmed mean of M-values normalization
74+
75+
Parameters
76+
----------
77+
matrix : array_like
78+
Matrix to normalize.
79+
index_ref:
80+
Index of reference column.
81+
trim_fold_change:
82+
Percent of trimmed for folder change.
83+
trim_abs_expr:
84+
Percent of trimmed for absolute expression.
85+
86+
Returns
87+
-------
88+
array_like
89+
Normalized matrix.
90+
"""
91+
matrix_np = np.array(matrix) # better speed of calculating
92+
np.seterr(divide='ignore', invalid='ignore') # for divide on zeros in log2
93+
94+
# Calculation log2(tmm_factor)
95+
def log2_tmm(index_vec):
96+
# select the necessary vectors
97+
curr_vec = matrix_np[:, index_vec]
98+
ref_vec = matrix_np[:, index_ref]
99+
100+
# total number molecules in cells
101+
total_curr_vec = np.sum(curr_vec)
102+
total_ref_vec = np.sum(ref_vec)
103+
104+
# select significant genes
105+
check_inf = (~np.isinf(matr_a[:, index_vec])) & (~np.isinf(matr_m[:, index_vec]))
106+
ranks = rankdata(matr_a[:, index_vec][check_inf], method='ordinal')
107+
bool_a = (ranks > len(ranks) * trim_abs_expr) & (ranks < len(ranks) * (1 - trim_abs_expr))
108+
ranks = rankdata(matr_m[:, index_vec][check_inf], method='ordinal')
109+
bool_m = (ranks > len(ranks) * trim_fold_change) & (ranks < len(ranks) * (1 - trim_fold_change))
110+
curr_vec = curr_vec[check_inf]
111+
ref_vec = ref_vec[check_inf]
112+
bool_curr_vec = curr_vec > 0
113+
bool_ref = ref_vec > 0
114+
bool_result = bool_curr_vec & bool_ref & bool_a & bool_m
115+
116+
# ñalculation of required values
117+
w_vec = 1 / ((total_curr_vec - curr_vec[bool_result]) / (total_curr_vec * curr_vec[bool_result]) +
118+
(total_ref_vec - ref_vec[bool_result]) / (total_ref_vec * ref_vec[bool_result]))
119+
m_vec = np.log2(curr_vec[bool_result] / total_curr_vec) - np.log2(ref_vec[bool_result] / total_ref_vec)
120+
121+
# calculation log2(tmm_factor)
122+
w_sum = np.sum(w_vec)
123+
if np.isclose(w_sum, 0) or np.isinf(w_sum):
124+
print("Unexpected sum of weights for vector {}: '{}'".format(index_vec, w_sum), file=stderr)
125+
return 0
126+
127+
return np.sum(w_vec * m_vec) / w_sum
128+
129+
# find index of reference column
130+
f75 = percentile(matrix_np, 75)
131+
if index_ref is None:
132+
index_ref = np.argmin(abs(f75 - np.mean(f75)))
133+
elif isinstance(matrix, pd.DataFrame) and ~isinstance(index_ref, int):
134+
index_ref = np.where(matrix.columns.values == (index_ref))[0][0]
135+
136+
# find matrix A and M described expression levels of genes
137+
matr_norm = matrix_np / np.sum(matrix_np, axis=0)
138+
matr_a = np.log2(matr_norm * matr_norm[:, index_ref].reshape(matr_norm.shape[0], 1)) / 2
139+
matr_m = np.log2(matr_norm / matr_norm[:, index_ref].reshape(matr_norm.shape[0], 1))
140+
141+
# calculation tmm_factor and normalization of input data
142+
tmm_factor = 2 ** np.array([log2_tmm(i) for i in range(matrix_np.shape[1])])
143+
return matrix / tmm_factor
144+
145+
#===============================================================================

build/lib/pygmnormalize/utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
3+
def percentile(matrix, p):
4+
"""
5+
Estimation of percentile without zeros.
6+
7+
Parameters
8+
----------
9+
matrix : array_like
10+
Matrix to calculate percentile.
11+
p : float in range of [0,100]
12+
Percentile to compute, must be between 0 and 100 inclusive.
13+
14+
Returns
15+
-------
16+
float
17+
Calculated percentile.
18+
"""
19+
20+
return np.percentile(matrix[np.any(matrix > 0, axis=1)], p, axis=0)
Lines changed: 75 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,9 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 1,
12+
"execution_count": 10,
1313
"metadata": {},
14-
"outputs": [
15-
{
16-
"ename": "ModuleNotFoundError",
17-
"evalue": "No module named 'pygmnormalize'",
18-
"output_type": "error",
19-
"traceback": [
20-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
21-
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
22-
"\u001b[1;32m<ipython-input-1-3e2e612184d4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mpygmnormalize\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mgmn\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
23-
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pygmnormalize'"
24-
]
25-
}
26-
],
14+
"outputs": [],
2715
"source": [
2816
"import pandas as pd\n",
2917
"import pygmnormalize as gmn"
@@ -38,13 +26,11 @@
3826
},
3927
{
4028
"cell_type": "code",
41-
"execution_count": 9,
42-
"metadata": {
43-
"collapsed": true
44-
},
29+
"execution_count": 11,
30+
"metadata": {},
4531
"outputs": [],
4632
"source": [
47-
"matrix = pd.read_table('../test_data/numeric_dataset_1.txt', sep=' ', header=None)"
33+
"matrix = pd.read_table('test_data/numeric_dataset_1.txt', sep=' ', header=None)"
4834
]
4935
},
5036
{
@@ -56,7 +42,7 @@
5642
},
5743
{
5844
"cell_type": "code",
59-
"execution_count": 10,
45+
"execution_count": 12,
6046
"metadata": {},
6147
"outputs": [
6248
{
@@ -136,7 +122,7 @@
136122
"5 8 9 8"
137123
]
138124
},
139-
"execution_count": 10,
125+
"execution_count": 12,
140126
"metadata": {},
141127
"output_type": "execute_result"
142128
}
@@ -154,11 +140,21 @@
154140
},
155141
{
156142
"cell_type": "code",
157-
"execution_count": null,
158-
"metadata": {
159-
"collapsed": true
160-
},
161-
"outputs": [],
143+
"execution_count": 17,
144+
"metadata": {},
145+
"outputs": [
146+
{
147+
"ename": "AttributeError",
148+
"evalue": "module 'pygmnormalize' has no attribute 'total_count_normalization'",
149+
"output_type": "error",
150+
"traceback": [
151+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
152+
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
153+
"\u001b[1;32m<ipython-input-17-666094828939>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnormal_matrix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgmn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtotal_count_normalization\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmatrix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mnormal_matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
154+
"\u001b[1;31mAttributeError\u001b[0m: module 'pygmnormalize' has no attribute 'total_count_normalization'"
155+
]
156+
}
157+
],
162158
"source": [
163159
"normal_matrix = gmn.total_count_normalization(matrix)\n",
164160
"normal_matrix"
@@ -173,11 +169,21 @@
173169
},
174170
{
175171
"cell_type": "code",
176-
"execution_count": null,
177-
"metadata": {
178-
"collapsed": true
179-
},
180-
"outputs": [],
172+
"execution_count": 18,
173+
"metadata": {},
174+
"outputs": [
175+
{
176+
"ename": "AttributeError",
177+
"evalue": "module 'pygmnormalize' has no attribute 'percentile_normalization'",
178+
"output_type": "error",
179+
"traceback": [
180+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
181+
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
182+
"\u001b[1;32m<ipython-input-18-1ae77d23c843>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnormal_matrix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgmn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpercentile_normalization\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmatrix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mnormal_matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
183+
"\u001b[1;31mAttributeError\u001b[0m: module 'pygmnormalize' has no attribute 'percentile_normalization'"
184+
]
185+
}
186+
],
181187
"source": [
182188
"normal_matrix = gmn.percentile_normalization(matrix)\n",
183189
"normal_matrix"
@@ -192,11 +198,21 @@
192198
},
193199
{
194200
"cell_type": "code",
195-
"execution_count": null,
196-
"metadata": {
197-
"collapsed": true
198-
},
199-
"outputs": [],
201+
"execution_count": 19,
202+
"metadata": {},
203+
"outputs": [
204+
{
205+
"ename": "AttributeError",
206+
"evalue": "module 'pygmnormalize' has no attribute 'quartile_normalization'",
207+
"output_type": "error",
208+
"traceback": [
209+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
210+
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
211+
"\u001b[1;32m<ipython-input-19-8ff3b3ba0a41>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnormal_matrix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgmn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mquartile_normalization\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmatrix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mnormal_matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
212+
"\u001b[1;31mAttributeError\u001b[0m: module 'pygmnormalize' has no attribute 'quartile_normalization'"
213+
]
214+
}
215+
],
200216
"source": [
201217
"normal_matrix = gmn.quartile_normalization(matrix)\n",
202218
"normal_matrix"
@@ -209,17 +225,36 @@
209225
"After TMM normalization:"
210226
]
211227
},
228+
{
229+
"cell_type": "code",
230+
"execution_count": 20,
231+
"metadata": {},
232+
"outputs": [
233+
{
234+
"ename": "AttributeError",
235+
"evalue": "module 'pygmnormalize' has no attribute 'tmm_normalization'",
236+
"output_type": "error",
237+
"traceback": [
238+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
239+
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
240+
"\u001b[1;32m<ipython-input-20-a19a3282316d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnormal_matrix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgmn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtmm_normalization\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmatrix\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mnormal_matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
241+
"\u001b[1;31mAttributeError\u001b[0m: module 'pygmnormalize' has no attribute 'tmm_normalization'"
242+
]
243+
}
244+
],
245+
"source": [
246+
"normal_matrix = gmn.tmm_normalization(matrix)\n",
247+
"normal_matrix"
248+
]
249+
},
212250
{
213251
"cell_type": "code",
214252
"execution_count": null,
215253
"metadata": {
216254
"collapsed": true
217255
},
218256
"outputs": [],
219-
"source": [
220-
"normal_matrix = gmn.tmm_normalization(matrix)\n",
221-
"normal_matrix"
222-
]
257+
"source": []
223258
}
224259
],
225260
"metadata": {

dist/pygmnormalize-0.1-py3.6.egg

6.09 KB
Binary file not shown.

pygmnormalize.egg-info/PKG-INFO

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Metadata-Version: 1.1
2+
Name: pygmnormalize
3+
Version: 0.1
4+
Summary: Package with methods for normalization matrices of genes expression.
5+
Home-page: https://github.com/ficusss/PyGMNormalize
6+
Author: Grigory Feoktistov
7+
Author-email: [email protected]
8+
License: MIT
9+
Description: UNKNOWN
10+
Platform: UNKNOWN
11+
Classifier: Development Status :: 3 - Alpha
12+
Classifier: Intended Audience :: Science/Research
13+
Classifier: License :: OSI Approved :: MIT License
14+
Classifier: Operating System :: OS Independent
15+
Classifier: Programming Language :: Python

pygmnormalize.egg-info/SOURCES.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
setup.py
2+
pygmnormalize/__init__.py
3+
pygmnormalize/normalizations.py
4+
pygmnormalize/utils.py
5+
pygmnormalize.egg-info/PKG-INFO
6+
pygmnormalize.egg-info/SOURCES.txt
7+
pygmnormalize.egg-info/dependency_links.txt
8+
pygmnormalize.egg-info/not-zip-safe
9+
pygmnormalize.egg-info/top_level.txt
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pygmnormalize

0 commit comments

Comments
 (0)