Skip to content

Commit 46ea99d

Browse files
committed
Merge pull request biojava#290 from lafita/multaln
Polishing demo & gui code for mutliple alignments
2 parents 15f3cd8 + 179be76 commit 46ea99d

File tree

13 files changed

+714
-261
lines changed

13 files changed

+714
-261
lines changed

biojava-structure-gui/src/main/java/demo/DemoCEMC.java renamed to biojava-structure-gui/src/main/java/demo/DemoMultipleMC.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88

99
import org.biojava.nbio.structure.Atom;
1010
import org.biojava.nbio.structure.StructureException;
11-
import org.biojava.nbio.structure.align.cemc.CeMcMain;
11+
import org.biojava.nbio.structure.align.ce.CeCPMain;
1212
import org.biojava.nbio.structure.align.gui.MultipleAlignmentDisplay;
1313
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
14+
import org.biojava.nbio.structure.align.multiple.mc.MultipleMcMain;
15+
import org.biojava.nbio.structure.align.multiple.mc.MultipleMcParameters;
1416
import org.biojava.nbio.structure.align.util.AtomCache;
1517

1618
/**
@@ -20,7 +22,7 @@
2022
* @author Aleix Lafita
2123
*
2224
*/
23-
public class DemoCEMC {
25+
public class DemoMultipleMC {
2426

2527
public static void main(String[] args) throws IOException, StructureException, InterruptedException, ExecutionException {
2628

@@ -30,11 +32,9 @@ public static void main(String[] args) throws IOException, StructureException, I
3032
//Protein Kinases (CEMC paper)
3133
//List<String> names = Arrays.asList("1cdk.A", "1cja.A", "1csn", "1b6c.B", "1ir3.A", "1fgk.A", "1byg.A", "1hck", "1blx.A", "3erk", "1bmk.A", "1kob.A", "1tki.A", "1phk", "1a06");
3234
//DHFR (Gerstein 1998 paper)
33-
//List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
35+
List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
3436
//TIM barrels (MUSTA paper)
3537
//List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B");
36-
//Helix-bundle (MUSTA paper)
37-
List<String> names = Arrays.asList("1bbh.A", "1aep", "1bge.B", "256b.A", "2ccy.A", "2hmz.A", "3ink.C");
3838
//Calcium Binding (MUSTA paper)
3939
//List<String> names = Arrays.asList("4cpv", "2scp.A", "2sas", "1top", "1scm.B", "3icb");
4040
//Serine Rich Proteins SERP (MUSTA paper)
@@ -45,7 +45,7 @@ public static void main(String[] args) throws IOException, StructureException, I
4545
//List<String> names = Arrays.asList("2z73.A", "1u19.A", "4ug2.A", "4xt3", "4or2.A", "3odu.A");
4646
//Immunoglobulins (MAMMOTH paper)
4747
//List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf");
48-
//Globins (MAMMOTH and MUSTA papers)
48+
//Globins (MAMMOTH, POSA, Gerstein&Levitt and MUSTA papers)
4949
//List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A");
5050
//Rossman-Fold (POSA paper)
5151
//List<String> names = Arrays.asList("d1heta2", "d1ek6a_", "d1obfo1", "2cmd", "d1np3a2", "d1bgva1", "d1id1a_", "d1id1a_", "d1oi7a1");
@@ -66,7 +66,10 @@ public static void main(String[] args) throws IOException, StructureException, I
6666
}
6767

6868
//Here the multiple structural alignment algorithm comes in place to generate the alignment object
69-
CeMcMain algorithm = new CeMcMain();
69+
MultipleMcMain algorithm = new MultipleMcMain();
70+
MultipleMcParameters params = (MultipleMcParameters) algorithm.getParameters();
71+
params.setPairwiseAlgorithm(CeCPMain.algorithmName);
72+
7073
MultipleAlignment result = algorithm.align(atomArrays);
7174
result.getEnsemble().setStructureNames(names);
7275

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package demo;
2+
3+
import java.io.IOException;
4+
import java.util.Arrays;
5+
import java.util.List;
6+
import java.util.ArrayList;
7+
import java.util.concurrent.ExecutionException;
8+
9+
import org.biojava.nbio.structure.Atom;
10+
import org.biojava.nbio.structure.StructureException;
11+
import org.biojava.nbio.structure.align.ce.CeCPMain;
12+
import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
13+
import org.biojava.nbio.structure.align.multiple.MultipleAlignmentWriter;
14+
import org.biojava.nbio.structure.align.multiple.mc.MultipleMcMain;
15+
import org.biojava.nbio.structure.align.multiple.mc.MultipleMcParameters;
16+
import org.biojava.nbio.structure.align.util.AtomCache;
17+
18+
/**
19+
* Demo for running the MultipleMC Algorithm on a protein family.
20+
* For visualizing the results in jmol use the same Demo in the GUI module.
21+
* Here only the sequence alignment will be displayed.
22+
* Choose the family by commenting out the protein family names.
23+
*
24+
* @author Aleix Lafita
25+
*
26+
*/
27+
public class DemoMultipleMC {
28+
29+
public static void main(String[] args) throws IOException, StructureException, InterruptedException, ExecutionException {
30+
31+
//Specify the structures to align
32+
//ASP-proteinases (CEMC paper)
33+
//List<String> names = Arrays.asList("3app", "4ape", "2apr", "5pep", "1psn", "4cms", "1bbs.A", "1smr.A", "2jxr.A", "1mpp", "2asi", "1am5");
34+
//Protein Kinases (CEMC paper)
35+
//List<String> names = Arrays.asList("1cdk.A", "1cja.A", "1csn", "1b6c.B", "1ir3.A", "1fgk.A", "1byg.A", "1hck", "1blx.A", "3erk", "1bmk.A", "1kob.A", "1tki.A", "1phk", "1a06");
36+
//DHFR (Gerstein 1998 paper)
37+
List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
38+
//TIM barrels (MUSTA paper)
39+
//List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B");
40+
//Calcium Binding (MUSTA paper)
41+
//List<String> names = Arrays.asList("4cpv", "2scp.A", "2sas", "1top", "1scm.B", "3icb");
42+
//Serine Rich Proteins SERP (MUSTA paper)
43+
//List<String> names = Arrays.asList("7api.A", "8api.A", "1hle.A", "1ova.A", "2ach.A", "9api.A", "1psi", "1atu", "1kct", "1ath.A", "1att.A");
44+
//Serine Proteases (MUSTA paper)
45+
//List<String> names = Arrays.asList("1cse.E", "1sbn.E", "1pek.E", "3prk", "3tec.E");
46+
//GPCRs
47+
//List<String> names = Arrays.asList("2z73.A", "1u19.A", "4ug2.A", "4xt3", "4or2.A", "3odu.A");
48+
//Immunoglobulins (MAMMOTH paper)
49+
//List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf");
50+
//Globins (MAMMOTH, POSA, Gerstein&Levitt and MUSTA papers)
51+
//List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A");
52+
//Rossman-Fold (POSA paper)
53+
//List<String> names = Arrays.asList("d1heta2", "d1ek6a_", "d1obfo1", "2cmd", "d1np3a2", "d1bgva1", "d1id1a_", "d1id1a_", "d1oi7a1");
54+
//Circular Permutations (Bliven CECP paper) - dynamin GTP-ase with CP G-domain
55+
//List<String> names = Arrays.asList("d1u0la2", "d1jwyb_");
56+
//Circular Permutations: SAND and MFPT domains
57+
//List<String> names = Arrays.asList("d2bjqa1", "d1h5pa_", "d1ufna_"); //"d1oqja"
58+
//Flexible domain family of proteins (FatCat paper?)
59+
60+
//Ankyrin Repeats
61+
//List<String> names = Arrays.asList("d1n0ra_", "3ehq.A", "1awc.B"); //ankyrin
62+
63+
//Load the CA atoms of the structures
64+
AtomCache cache = new AtomCache();
65+
List<Atom[]> atomArrays = new ArrayList<Atom[]>();
66+
for (String name:names) {
67+
atomArrays.add(cache.getAtoms(name));
68+
}
69+
70+
//Here the multiple structural alignment algorithm comes in place to generate the alignment object
71+
MultipleMcMain algorithm = new MultipleMcMain();
72+
MultipleMcParameters params = (MultipleMcParameters) algorithm.getParameters();
73+
params.setPairwiseAlgorithm(CeCPMain.algorithmName);
74+
75+
MultipleAlignment result = algorithm.align(atomArrays);
76+
result.getEnsemble().setStructureNames(names);
77+
78+
//Information about the alignment
79+
result.getEnsemble().setAlgorithmName(algorithm.getAlgorithmName());
80+
result.getEnsemble().setVersion(algorithm.getVersion());
81+
82+
//Output the sequence alignment + transformations
83+
System.out.println(MultipleAlignmentWriter.toFatCat(result));
84+
//System.out.println(MultipleAlignmentWriter.toFASTA(result));
85+
System.out.println(MultipleAlignmentWriter.toTransformMatrices(result));
86+
}
87+
}

biojava-structure/src/main/java/org/biojava/nbio/structure/align/cemc/CeMcParameters.java

Lines changed: 0 additions & 34 deletions
This file was deleted.

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/BlockImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class BlockImpl extends AbstractScoresCache implements Serializable, Bloc
1919
private int coreLength; //number of residues aligned without gaps (cache)
2020

2121
/**
22-
* Constructor.
22+
* Constructor. Links also the parent to this instance.
2323
* @param blockSet the parent BlockSet of the BlockImpl instance.
2424
* @return BlockImpl a BlockImpl instance linked to its parent BlockSet.
2525
*/
@@ -41,7 +41,7 @@ public BlockImpl(BlockSet blockSet) {
4141
public BlockImpl(BlockImpl b) {
4242

4343
this.parent = b.parent;
44-
this.coreLength = b.coreLength;
44+
this.coreLength = -1;
4545

4646
this.alignRes = null;
4747
if (b.alignRes!=null){

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/BlockSetImpl.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import javax.vecmath.Matrix4d;
88

99
/**
10-
* A general implementation of a BlockSet to store multiple alignments.
10+
* A general implementation of a BlockSet to store flexible parts of a multiple alignments.
1111
*
1212
* @author Aleix Lafita
1313
*
@@ -25,7 +25,7 @@ public class BlockSetImpl extends AbstractScoresCache implements Serializable, B
2525
private int coreLength; //number of aligned positions without gaps (cache)
2626

2727
/**
28-
* Constructor.
28+
* Constructor. Links also the parent to this instance.
2929
* @param multipleAlignment the parent MultipleAlignment of the BlockImpl instance.
3030
* @return BlockSetImpl a BlockSetImpl instance linked to its parent MultipleAlignment.
3131
*/
@@ -49,10 +49,18 @@ public BlockSetImpl(MultipleAlignment multipleAlignment) {
4949
public BlockSetImpl(BlockSetImpl bs){
5050

5151
this.parent = bs.parent;
52-
this.length = bs.length;
53-
this.coreLength = bs.coreLength;
52+
this.length = -1;
53+
this.coreLength = -1;
5454

55-
this.pose = null; pose = null; //Because the pose is a cache variable it has to be updated/calculated again.
55+
this.pose = null;
56+
if (bs.pose != null){
57+
//Make a deep copy of everything
58+
this.pose = new ArrayList<Matrix4d>();
59+
for (Matrix4d trans:bs.pose){
60+
Matrix4d newTrans = (Matrix4d) trans.clone();
61+
pose.add(newTrans);
62+
}
63+
}
5664

5765
blocks = null;
5866
if (bs.blocks!=null){

biojava-structure/src/main/java/org/biojava/nbio/structure/align/multiple/MultipleAlignmentImpl.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,18 @@ public MultipleAlignmentImpl(MultipleAlignmentImpl ma) {
7575
super(ma);
7676

7777
parent = ma.parent;
78-
pose = null; //Because the pose is a cache variable it has to be updated/calculated again.
78+
pose = null;
79+
if (ma.pose != null){
80+
//Make a deep copy of everything
81+
this.pose = new ArrayList<Matrix4d>();
82+
for (Matrix4d trans:ma.pose){
83+
Matrix4d newTrans = (Matrix4d) trans.clone();
84+
pose.add(newTrans);
85+
}
86+
}
87+
88+
length = -1;
89+
coreLength = -1;
7990

8091
blockSets = null;
8192
if (ma.blockSets!=null){

0 commit comments

Comments
 (0)