Skip to content

Commit d067eca

Browse files
committed
improvements to memory usage during multiple sequence alignment
git-svn-id: http://code.open-bio.org/repos/biojava/biojava-live/trunk@8156 7c6358e6-4a41-0410-a743-a5b2a554c398
1 parent 0169186 commit d067eca

File tree

8 files changed

+89
-63
lines changed

8 files changed

+89
-63
lines changed

biojava3-alignment/src/main/java/org/biojava3/alignment/FractionalIdentityScorer.java

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,13 @@
4242
public class FractionalIdentityScorer<S extends Sequence<C>, C extends Compound> extends AbstractScorer
4343
implements PairwiseSequenceScorer<S, C> {
4444

45-
// input field
46-
private SequencePair<S, C> pair;
45+
// always stored
46+
private S query, target;
47+
private int max, score;
4748

4849
// optional cached input field
4950
private PairwiseSequenceAligner<S, C> aligner;
5051

51-
// output fields
52-
private int max, score;
53-
5452
/**
5553
* Creates a fractional identity scorer for a pair of sequences aligned by the given pairwise sequence aligner.
5654
*
@@ -69,17 +67,23 @@ public FractionalIdentityScorer(SequencePair<S, C> pair) {
6967
set(pair);
7068
}
7169

70+
// methods for PairwiseSequenceScorer
71+
7272
@Override
73-
public SequencePair<S, C> getPair() {
74-
if (pair == null && aligner != null) {
75-
set(aligner.getPair());
76-
}
77-
return pair;
73+
public S getQuery() {
74+
return query;
7875
}
7976

77+
@Override
78+
public S getTarget() {
79+
return target;
80+
}
81+
82+
// methods for Scorer
83+
8084
@Override
8185
public int getMaxScore() {
82-
if (pair == null && aligner != null) {
86+
if ((query == null || target == null) && aligner != null) {
8387
set(aligner.getPair());
8488
}
8589
return max;
@@ -92,15 +96,16 @@ public int getMinScore() {
9296

9397
@Override
9498
public int getScore() {
95-
if (pair == null && aligner != null) {
99+
if ((query == null || target == null) && aligner != null) {
96100
set(aligner.getPair());
97101
}
98102
return score;
99103
}
100104

101105
// helper method for initialization
102106
private void set(SequencePair<S, C> pair) {
103-
this.pair = pair;
107+
query = pair.getQuery().getOriginalSequence();
108+
target = pair.getTarget().getOriginalSequence();
104109
max = pair.getLength();
105110
score = pair.getNumIdenticals();
106111
aligner = null;

biojava3-alignment/src/main/java/org/biojava3/alignment/FractionalSimilarityScorer.java

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,13 @@
4242
public class FractionalSimilarityScorer<S extends Sequence<C>, C extends Compound> extends AbstractScorer
4343
implements PairwiseSequenceScorer<S, C> {
4444

45-
// input field
46-
private SequencePair<S, C> pair;
45+
// always stored
46+
private S query, target;
47+
private int max, score;
4748

4849
// optional cached input field
4950
private PairwiseSequenceAligner<S, C> aligner;
5051

51-
// output fields
52-
private int max, score;
53-
5452
/**
5553
* Creates a fractional similarity scorer for a pair of sequences aligned by the given pairwise sequence aligner.
5654
*
@@ -69,17 +67,23 @@ public FractionalSimilarityScorer(SequencePair<S, C> pair) {
6967
set(pair);
7068
}
7169

70+
// methods for PairwiseSequenceScorer
71+
7272
@Override
73-
public SequencePair<S, C> getPair() {
74-
if (pair == null && aligner != null) {
75-
set(aligner.getPair());
76-
}
77-
return pair;
73+
public S getQuery() {
74+
return query;
7875
}
7976

77+
@Override
78+
public S getTarget() {
79+
return target;
80+
}
81+
82+
// methods for Scorer
83+
8084
@Override
8185
public int getMaxScore() {
82-
if (pair == null && aligner != null) {
86+
if ((query == null || target == null) && aligner != null) {
8387
set(aligner.getPair());
8488
}
8589
return max;
@@ -92,15 +96,16 @@ public int getMinScore() {
9296

9397
@Override
9498
public int getScore() {
95-
if (pair == null && aligner != null) {
99+
if ((query == null || target == null) && aligner != null) {
96100
set(aligner.getPair());
97101
}
98102
return score;
99103
}
100104

101105
// helper method for initialization
102106
private void set(SequencePair<S, C> pair) {
103-
this.pair = pair;
107+
query = pair.getQuery().getOriginalSequence();
108+
target = pair.getTarget().getOriginalSequence();
104109
max = pair.getLength();
105110
score = pair.getNumSimilars();
106111
aligner = null;

biojava3-alignment/src/main/java/org/biojava3/alignment/SimpleSequencePair.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ public int getNumIdenticals() {
124124
identicals++;
125125
}
126126
}
127+
getQuery().clearCache();
128+
getTarget().clearCache();
127129
}
128-
getQuery().clearCache();
129-
getTarget().clearCache();
130130
return identicals;
131131
}
132132

@@ -139,9 +139,9 @@ public int getNumSimilars() {
139139
similars++;
140140
}
141141
}
142+
getQuery().clearCache();
143+
getTarget().clearCache();
142144
}
143-
getQuery().clearCache();
144-
getTarget().clearCache();
145145
return similars;
146146
}
147147

biojava3-alignment/src/main/java/org/biojava3/alignment/template/PairwiseSequenceAligner.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@
3636
public interface PairwiseSequenceAligner<S extends Sequence<C>, C extends Compound> extends Aligner<S, C>,
3737
PairwiseSequenceScorer<S, C> {
3838

39-
// combines 2 interfaces
39+
/**
40+
* Returns sequence alignment pair.
41+
*
42+
* @return sequence alignment pair
43+
*/
44+
SequencePair<S, C> getPair();
4045

4146
}

biojava3-alignment/src/main/java/org/biojava3/alignment/template/PairwiseSequenceScorer.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,26 @@
2727
import org.biojava3.core.sequence.template.Sequence;
2828

2929
/**
30-
* Defines an algorithm which computes a score for a sequence alignment pair.
30+
* Defines an algorithm which computes a score for a pair of sequences.
3131
*
3232
* @author Mark Chapman
3333
* @param <S> each {@link Sequence} of the alignment pair is of type S
34-
* @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
34+
* @param <C> each element of an Sequence is a {@link Compound} of type C
3535
*/
3636
public interface PairwiseSequenceScorer<S extends Sequence<C>, C extends Compound> extends Scorer {
3737

3838
/**
39-
* Returns pair for which score was calculated.
39+
* Returns the first sequence of the pair.
4040
*
41-
* @return sequence alignment pair
41+
* @return the first sequence of the pair
4242
*/
43-
SequencePair<S, C> getPair();
43+
S getQuery();
44+
45+
/**
46+
* Returns the second sequence of the pair.
47+
*
48+
* @return the second sequence of the pair
49+
*/
50+
S getTarget();
4451

4552
}

biojava3-alignment/src/test/java/org/biojava3/alignment/CookbookMSAProfiler.java

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
package org.biojava3.alignment;
22

33
import java.io.File;
4-
import java.io.FileNotFoundException;
54
import java.io.PrintStream;
65
import java.util.ArrayList;
76
import java.util.List;
87

98
import org.biojava3.alignment.Alignments.PairwiseScorer;
109
import org.biojava3.alignment.Alignments.ProfileAligner;
1110
import org.biojava3.alignment.template.GapPenalty;
12-
import org.biojava3.alignment.template.GuideTreeNode;
1311
import org.biojava3.alignment.template.PairwiseSequenceScorer;
1412
import org.biojava3.alignment.template.Profile;
1513
import org.biojava3.alignment.template.SubstitutionMatrix;
@@ -26,7 +24,7 @@ private static class Profiler {
2624
private final long timeStart;
2725

2826
private Profiler() {
29-
maxMemoryUsed = Math.max(maxMemoryUsed, Runtime.getRuntime().totalMemory());
27+
maxMemoryUsed = Runtime.getRuntime().totalMemory();
3028
timeStart = timeCheckpoint = System.nanoTime();
3129
}
3230

@@ -49,7 +47,7 @@ private void setCheckpoint() {
4947

5048
}
5149

52-
public static void main(String[] args) throws FileNotFoundException {
50+
public static void main(String[] args) throws Exception {
5351

5452
if (args.length < 1) {
5553
System.err.println("The first argument must be a fasta file of protein sequences.");
@@ -63,17 +61,15 @@ public static void main(String[] args) throws FileNotFoundException {
6361

6462
System.out.printf("Loading sequences from %s... ", args[0]);
6563
List<ProteinSequence> list = new ArrayList<ProteinSequence>();
66-
try {
67-
list.addAll(FastaReaderHelper.readFastaProteinSequence(new File(args[0])).values());
68-
} catch (Exception e) {
69-
e.printStackTrace();
70-
}
64+
list.addAll(FastaReaderHelper.readFastaProteinSequence(new File(args[0])).values());
7165
if (args.length > 1 && Integer.parseInt(args[1]) < list.size()) {
72-
System.out.printf("%s/%d sequences in %d ms%n%n", args[1], list.size(), profiler.getTimeSinceCheckpoint()/1000000);
66+
System.out.printf("%s/%d", args[1], list.size());
7367
list = list.subList(0, Integer.parseInt(args[1]));
7468
} else {
75-
System.out.printf("%d sequences in %d ms%n%n", list.size(), profiler.getTimeSinceCheckpoint()/1000000);
69+
System.out.printf("%d", list.size());
7670
}
71+
System.out.printf(" sequences in %d ms using %d kB%n%n", profiler.getTimeSinceCheckpoint()/1000000,
72+
profiler.getMaxMemoryUsed()/1024);
7773

7874
profiler.setCheckpoint();
7975

@@ -83,29 +79,25 @@ public static void main(String[] args) throws FileNotFoundException {
8379
List<PairwiseSequenceScorer<ProteinSequence, AminoAcidCompound>> scorers = Alignments.getAllPairsScorers(list,
8480
PairwiseScorer.GLOBAL_IDENTITIES, gaps, blosum62);
8581
Alignments.runPairwiseScorers(scorers);
86-
System.out.printf("%d scores in %d ms%n%n", scorers.size(), profiler.getTimeSinceCheckpoint()/1000000);
82+
System.out.printf("%d scores in %d ms using %d kB%n%n", scorers.size(),
83+
profiler.getTimeSinceCheckpoint()/1000000, profiler.getMaxMemoryUsed()/1024);
8784

8885
profiler.setCheckpoint();
8986

9087
System.out.print("Stage 2: hierarchical clustering into a guide tree... ");
9188
GuideTree<ProteinSequence, AminoAcidCompound> tree = new GuideTree<ProteinSequence, AminoAcidCompound>(list,
9289
scorers);
9390
scorers = null;
94-
System.out.printf("%d ms%n%n%s%n%n", profiler.getTimeSinceCheckpoint()/1000000, tree);
91+
System.out.printf("%d ms using %d kB%n%n%s%n%n", profiler.getTimeSinceCheckpoint()/1000000,
92+
profiler.getMaxMemoryUsed()/1024, tree);
9593

9694
profiler.setCheckpoint();
9795

9896
System.out.print("Stage 3: progressive alignment... ");
99-
int ppa = 0;
100-
for (GuideTreeNode<ProteinSequence, AminoAcidCompound> n : tree) {
101-
if (!n.isLeaf()) {
102-
ppa++;
103-
}
104-
}
10597
Profile<ProteinSequence, AminoAcidCompound> msa = Alignments.getProgressiveAlignment(tree,
10698
ProfileAligner.GLOBAL, gaps, blosum62);
107-
System.out.printf("%d profile-profile alignments in %d ms%n%n", ppa,
108-
profiler.getTimeSinceCheckpoint()/1000000);
99+
System.out.printf("%d profile-profile alignments in %d ms using %d kB%n%n", list.size() - 1,
100+
profiler.getTimeSinceCheckpoint()/1000000, profiler.getMaxMemoryUsed()/1024);
109101
fout.print(msa);
110102
fout.close();
111103

biojava3-alignment/src/test/java/org/biojava3/alignment/FractionalIdentityScorerTest.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,15 @@ public void testFractionalIdentityScorerSequencePairOfSC() {
6464
}
6565

6666
@Test
67-
public void testGetPair() {
68-
assertEquals(scorer1.getPair(), alignment.getPair());
69-
assertEquals(scorer2.getPair(), self.getPair());
67+
public void testGetQuery() {
68+
assertEquals(scorer1.getQuery(), query);
69+
assertEquals(scorer2.getQuery(), query);
70+
}
71+
72+
@Test
73+
public void testGetTarget() {
74+
assertEquals(scorer1.getQuery(), target);
75+
assertEquals(scorer2.getQuery(), query);
7076
}
7177

7278
@Test

biojava3-alignment/src/test/java/org/biojava3/alignment/FractionalSimilarityScorerTest.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,15 @@ public void testFractionalSimilarityScorerSequencePairOfSC() {
6565
}
6666

6767
@Test
68-
public void testGetPair() {
69-
assertEquals(scorer1.getPair(), alignment.getPair());
70-
assertEquals(scorer2.getPair(), self.getPair());
68+
public void testGetQuery() {
69+
assertEquals(scorer1.getQuery(), query);
70+
assertEquals(scorer2.getQuery(), query);
71+
}
72+
73+
@Test
74+
public void testGetTarget() {
75+
assertEquals(scorer1.getQuery(), target);
76+
assertEquals(scorer2.getQuery(), query);
7177
}
7278

7379
@Test

0 commit comments

Comments
 (0)