Skip to content

Commit ca01321

Browse files
authored
Merge pull request biojava#959 from richarda23/alignment-matrix-tests
Alignment matrix tests
2 parents 0d9650a + 91105d7 commit ca01321

11 files changed

Lines changed: 329 additions & 63 deletions

File tree

biojava-core/src/main/java/org/biojava/nbio/core/alignment/matrices/AAIndexFileParser.java

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,11 @@ public class AAIndexFileParser {
5151
boolean inMatrix;
5252
boolean symmetricMatrix ;
5353

54-
5554
public AAIndexFileParser(){
5655
matrices = new HashMap<String, SubstitutionMatrix<AminoAcidCompound>>();
5756
}
5857

59-
/** parse an inputStream that points to an AAINDEX database file
58+
/** Parse an inputStream that points to an AAINDEX database file
6059
*
6160
* @param inputStream
6261
* @throws IOException
@@ -75,13 +74,12 @@ public void parse(InputStream inputStream) throws IOException {
7574
line = buf.readLine();
7675

7776
while ( line != null ) {
78-
7977
if ( line.startsWith("//")) {
8078
finalizeMatrix();
8179
inMatrix = false;
8280

8381
} else if ( line.startsWith("H ")){
84-
// a new matric!
82+
// a new matrix!
8583
newMatrix(line);
8684
} else if ( line.startsWith("D ")) {
8785
currentMatrix.setDescription(line.substring(2));
@@ -92,25 +90,18 @@ public void parse(InputStream inputStream) throws IOException {
9290
if ( inMatrix)
9391
processScores(line);
9492
}
95-
9693
line = buf.readLine();
9794
}
98-
9995
}
10096

101-
10297
// process a line such as > -0.3 1.6 0.7 0.8 -2.6 3.0<
10398
private void processScores(String line) {
104-
10599
String[] values = line.trim().split(" +");
106100

107101
// increment the current row we are talking about
108102
currentRowPos++;
109103

110-
111-
112104
for ( int i =0 ; i < values.length ; i++){
113-
114105
if ( values[i].endsWith(".")) {
115106
values[i] = values[i] + "0";
116107
}
@@ -119,48 +110,34 @@ private void processScores(String line) {
119110
if (values[i].equals("-")) {
120111
values[i] = "0";
121112
}
122-
123113
if ( scale == -1 ) {
124114
scale = determineScale(values[0]);
125115
}
126116

127-
128117
Float score = Float.parseFloat(values[i]);
129118
score = scale * score;
130119

131120
Short s = (short) Math.round(score);
132-
133121
matrix[currentRowPos][i] = s;
134122

135123
if ( values.length < cols.size() || ( symmetricMatrix)){
136124
//System.out.println(values.length + " " + cols.size() + " " + currentRowPos + " " + i + " " + line);
137-
138125
matrix[i][currentRowPos] = s;
139-
140126
symmetricMatrix = true;
141-
142127
}
143-
144128
if ( score > max)
145129
max = s;
146130
if ( score < min)
147131
min = s;
148-
149-
150132
}
151133
}
152134

153135
private int determineScale(String value) {
154-
155136
String[] spl = value.split("\\.");
156-
157137
if (spl.length <= 1)
158138
return 1;
159-
160139
String digits = spl[1];
161-
162140
return (int)Math.round(Math.pow(10, digits.length()));
163-
164141
}
165142

166143
// process a line of type >M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV<
@@ -180,27 +157,19 @@ private void initMatrix(String line) {
180157
rows = new ArrayList<AminoAcidCompound>();
181158
cols = new ArrayList<AminoAcidCompound>();
182159

183-
184160
//System.out.println(">" + currentRows+"<");
185161
AminoAcidCompoundSet compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
186162
for ( int i = 0 ; i < currentRows.length() ; i ++){
187163
char c = currentRows.charAt(i);
188164
AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c));
189-
190165
rows.add(aa);
191166
}
192167

193168
for ( int i = 0 ; i < currentCols.length() ; i ++){
194169
char c = currentRows.charAt(i);
195170
AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c));
196-
197171
cols.add(aa);
198172
}
199-
200-
201-
202-
203-
204173
currentMatrix.setScale(scale);
205174
}
206175

biojava-core/src/main/java/org/biojava/nbio/core/alignment/matrices/AAIndexProvider.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,11 @@
2626

2727
public interface AAIndexProvider {
2828

29-
public SubstitutionMatrix<AminoAcidCompound> getMatrix(String matrixName);
29+
/**
30+
* Gets a substitution matrix by its name. The matrices are defined in
31+
{@code}src/main/resources/matrices/AAINDEX.txt{@code}
32+
* @param matrixName
33+
* @return The @{code}SubstitutionMatrix{@code} or null if not exists
34+
*/
35+
SubstitutionMatrix<AminoAcidCompound> getMatrix(String matrixName);
3036
}

biojava-core/src/main/java/org/biojava/nbio/core/alignment/matrices/AAindexFactory.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,34 @@
2020
*/
2121
package org.biojava.nbio.core.alignment.matrices;
2222

23-
/** Factory class to get Providers for substitution matrices the are provided by the AAINDEX database.
23+
/**
24+
* Factory class to get Providers for substitution matrices that are provided by
25+
* the AAINDEX database.
2426
*
2527
* @author Andreas Prlic
2628
*
2729
*/
2830
public class AAindexFactory {
2931

30-
3132
private static AAIndexProvider provider = null;
3233

34+
/**
35+
* Gets singleton instance of an {@code AAIndexProvider}, always non-null
36+
*
37+
* @return
38+
*/
3339
public static AAIndexProvider getAAIndexProvider() {
34-
if ( provider == null)
40+
if (provider == null)
3541
provider = new DefaultAAIndexProvider();
3642
return provider;
3743
}
3844

45+
/**
46+
* Sets the singleton provider.
47+
* @param provider
48+
*/
3949
public static void setAAIndexProvider(AAIndexProvider provider) {
4050
AAindexFactory.provider = provider;
4151
}
4252

43-
44-
45-
4653
}

biojava-core/src/main/java/org/biojava/nbio/core/alignment/matrices/DefaultAAIndexProvider.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,7 @@ public class DefaultAAIndexProvider implements AAIndexProvider {
4242

4343
public DefaultAAIndexProvider(){
4444

45-
4645
InputStream inStream = getInputStreamToAAindexFile();
47-
4846
AAIndexFileParser parser = new AAIndexFileParser();
4947

5048
try {
@@ -54,14 +52,11 @@ public DefaultAAIndexProvider(){
5452
}
5553

5654
matrices = parser.getMatrices();
57-
5855
}
5956

6057
@Override
6158
public SubstitutionMatrix<AminoAcidCompound> getMatrix(String matrixName) {
62-
6359
return matrices.get(matrixName);
64-
6560
}
6661

6762
public InputStream getInputStreamToAAindexFile(){

biojava-core/src/main/java/org/biojava/nbio/core/alignment/matrices/SubstitutionMatrixHelper.java

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
3030
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
3131

32+
import java.io.InputStream;
3233
import java.io.InputStreamReader;
3334
import java.io.Serializable;
3435
import java.util.HashMap;
@@ -68,7 +69,10 @@ public static SubstitutionMatrix<AminoAcidCompound> getMatrixFromAAINDEX(String
6869

6970
}
7071

71-
72+
/**
73+
* Gets identity matrix where matches score 1 and mismatches score -10000
74+
* @return
75+
*/
7276
public static SubstitutionMatrix<AminoAcidCompound> getIdentity() {
7377
return getAminoAcidMatrix("identity");
7478
}
@@ -231,8 +235,8 @@ public static SubstitutionMatrix<AminoAcidCompound> getPAM250() {
231235

232236
/**
233237
* Returns a substitution matrix for {@link AminoAcidCompound amino acids} given by the name {@code name}.
234-
* Searches first in the default AAINDEX file (see @link {@link #getMatrixFromAAINDEX(String)}), then in the classpath.
235-
* If the required matrix does not exist, null is returned.
238+
* Searches first in the default AAINDEX file (see @link {@link #getMatrixFromAAINDEX(String)}), then in the classpath
239+
* in src/main/resources/matrices.
236240
* Example names:
237241
* <ul>
238242
* <li>blosum62</li>
@@ -241,6 +245,7 @@ public static SubstitutionMatrix<AminoAcidCompound> getPAM250() {
241245
* <li>gonnet250</li>
242246
* </ul>
243247
* @param name Either a common name or an AAINDEX name
248+
* @return a {@code} SubstitutionMatrix {@code} or {@code}null{@code} if no matrix is found
244249
*/
245250
public static SubstitutionMatrix<AminoAcidCompound> getAminoAcidSubstitutionMatrix(String name) {
246251
SubstitutionMatrix<AminoAcidCompound> matrix = getMatrixFromAAINDEX(name);
@@ -251,8 +256,12 @@ public static SubstitutionMatrix<AminoAcidCompound> getAminoAcidSubstitutionMatr
251256
// reads in an amino acid substitution matrix, if necessary
252257
private static SubstitutionMatrix<AminoAcidCompound> getAminoAcidMatrix(String file) {
253258
if (!aminoAcidMatrices.containsKey(file)) {
259+
InputStreamReader reader = getReader(file);
260+
if (reader == null) {
261+
return null;
262+
}
254263
aminoAcidMatrices.put(file, new SimpleSubstitutionMatrix<AminoAcidCompound>(
255-
AminoAcidCompoundSet.getAminoAcidCompoundSet(), getReader(file), file));
264+
AminoAcidCompoundSet.getAminoAcidCompoundSet(), reader , file));
256265
}
257266
return aminoAcidMatrices.get(file);
258267
}
@@ -269,8 +278,12 @@ private static SubstitutionMatrix<NucleotideCompound> getNucleotideMatrix(String
269278
// reads in a substitution matrix from a resource file
270279
private static InputStreamReader getReader(String file) {
271280
String resourcePathPrefix = "matrices/";
272-
return new InputStreamReader(SubstitutionMatrixHelper.class.getResourceAsStream(String.format("/%s.txt",
273-
resourcePathPrefix+file)));
281+
InputStream is = SubstitutionMatrixHelper.class.getResourceAsStream(String.format("/%s.txt",
282+
resourcePathPrefix+file));
283+
if (is == null) {
284+
return null;
285+
}
286+
return new InputStreamReader(is);
274287
}
275288

276289
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package org.biojava.nbio.core.alignment.matrices;
2+
3+
import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
4+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
5+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
6+
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
7+
import org.junit.jupiter.api.Test;
8+
9+
import static org.junit.jupiter.api.Assertions.*;
10+
11+
class AAindexFactoryTest {
12+
13+
14+
DefaultAAIndexProvider provider = new DefaultAAIndexProvider();
15+
16+
@Test
17+
void aaProviderIsSingleton(){
18+
AAIndexProvider provider = AAindexFactory.getAAIndexProvider();
19+
assertNotNull(provider);
20+
AAIndexProvider provider2 = AAindexFactory.getAAIndexProvider();
21+
assertTrue(provider == provider2);
22+
}
23+
24+
@Test
25+
void cannotSetProviderToNull(){
26+
AAindexFactory.setAAIndexProvider(null);
27+
assertNotNull(AAindexFactory.getAAIndexProvider());
28+
}
29+
30+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package org.biojava.nbio.core.alignment.matrices;
2+
3+
import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
4+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
5+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
6+
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
7+
import org.junit.jupiter.api.Test;
8+
9+
import static org.junit.jupiter.api.Assertions.*;
10+
11+
class DefaultAAIndexProviderTest {
12+
13+
private static final String BENS940102 = "BENS940102";
14+
15+
DefaultAAIndexProvider provider = new DefaultAAIndexProvider();
16+
@Test
17+
void newAAIndexProviderReturnsNullIfNotExists(){
18+
assertNull(provider.getMatrix("unknown"));
19+
}
20+
21+
@Test
22+
void aaIndexProviderGetByName(){
23+
SubstitutionMatrix<AminoAcidCompound> matrix = provider.getMatrix(BENS940102);
24+
assertNotNull(matrix);
25+
assertEquals(BENS940102, matrix.getName());
26+
}
27+
}

0 commit comments

Comments
 (0)