|
| 1 | +package demo; |
| 2 | + |
| 3 | +import java.io.IOException; |
| 4 | +import java.util.Arrays; |
| 5 | +import java.util.List; |
| 6 | +import java.util.ArrayList; |
| 7 | +import java.util.concurrent.ExecutionException; |
| 8 | + |
| 9 | +import org.biojava.nbio.structure.Atom; |
| 10 | +import org.biojava.nbio.structure.StructureException; |
| 11 | +import org.biojava.nbio.structure.align.ce.CeCPMain; |
| 12 | +import org.biojava.nbio.structure.align.multiple.MultipleAlignment; |
| 13 | +import org.biojava.nbio.structure.align.multiple.MultipleAlignmentWriter; |
| 14 | +import org.biojava.nbio.structure.align.multiple.mc.MultipleMcMain; |
| 15 | +import org.biojava.nbio.structure.align.multiple.mc.MultipleMcParameters; |
| 16 | +import org.biojava.nbio.structure.align.util.AtomCache; |
| 17 | + |
| 18 | +/** |
| 19 | + * Demo for running the MultipleMC Algorithm on a protein family. |
| 20 | + * For visualizing the results in jmol use the same Demo in the GUI module. |
| 21 | + * Here only the sequence alignment will be displayed. |
| 22 | + * Choose the family by commenting out the protein family names. |
| 23 | + * |
| 24 | + * @author Aleix Lafita |
| 25 | + * |
| 26 | + */ |
| 27 | +public class DemoMultipleMC { |
| 28 | + |
| 29 | + public static void main(String[] args) throws IOException, StructureException, InterruptedException, ExecutionException { |
| 30 | + |
| 31 | + //Specify the structures to align |
| 32 | + //ASP-proteinases (CEMC paper) |
| 33 | + //List<String> names = Arrays.asList("3app", "4ape", "2apr", "5pep", "1psn", "4cms", "1bbs.A", "1smr.A", "2jxr.A", "1mpp", "2asi", "1am5"); |
| 34 | + //Protein Kinases (CEMC paper) |
| 35 | + //List<String> names = Arrays.asList("1cdk.A", "1cja.A", "1csn", "1b6c.B", "1ir3.A", "1fgk.A", "1byg.A", "1hck", "1blx.A", "3erk", "1bmk.A", "1kob.A", "1tki.A", "1phk", "1a06"); |
| 36 | + //DHFR (Gerstein 1998 paper) |
| 37 | + List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr"); |
| 38 | + //TIM barrels (MUSTA paper) |
| 39 | + //List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B"); |
| 40 | + //Calcium Binding (MUSTA paper) |
| 41 | + //List<String> names = Arrays.asList("4cpv", "2scp.A", "2sas", "1top", "1scm.B", "3icb"); |
| 42 | + //Serine Rich Proteins SERP (MUSTA paper) |
| 43 | + //List<String> names = Arrays.asList("7api.A", "8api.A", "1hle.A", "1ova.A", "2ach.A", "9api.A", "1psi", "1atu", "1kct", "1ath.A", "1att.A"); |
| 44 | + //Serine Proteases (MUSTA paper) |
| 45 | + //List<String> names = Arrays.asList("1cse.E", "1sbn.E", "1pek.E", "3prk", "3tec.E"); |
| 46 | + //GPCRs |
| 47 | + //List<String> names = Arrays.asList("2z73.A", "1u19.A", "4ug2.A", "4xt3", "4or2.A", "3odu.A"); |
| 48 | + //Immunoglobulins (MAMMOTH paper) |
| 49 | + //List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf"); |
| 50 | + //Globins (MAMMOTH, POSA, Gerstein&Levitt and MUSTA papers) |
| 51 | + //List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A"); |
| 52 | + //Rossman-Fold (POSA paper) |
| 53 | + //List<String> names = Arrays.asList("d1heta2", "d1ek6a_", "d1obfo1", "2cmd", "d1np3a2", "d1bgva1", "d1id1a_", "d1id1a_", "d1oi7a1"); |
| 54 | + //Circular Permutations (Bliven CECP paper) - dynamin GTP-ase with CP G-domain |
| 55 | + //List<String> names = Arrays.asList("d1u0la2", "d1jwyb_"); |
| 56 | + //Circular Permutations: SAND and MFPT domains |
| 57 | + //List<String> names = Arrays.asList("d2bjqa1", "d1h5pa_", "d1ufna_"); //"d1oqja" |
| 58 | + //Flexible domain family of proteins (FatCat paper?) |
| 59 | + |
| 60 | + //Ankyrin Repeats |
| 61 | + //List<String> names = Arrays.asList("d1n0ra_", "3ehq.A", "1awc.B"); //ankyrin |
| 62 | + |
| 63 | + //Load the CA atoms of the structures |
| 64 | + AtomCache cache = new AtomCache(); |
| 65 | + List<Atom[]> atomArrays = new ArrayList<Atom[]>(); |
| 66 | + for (String name:names) { |
| 67 | + atomArrays.add(cache.getAtoms(name)); |
| 68 | + } |
| 69 | + |
| 70 | + //Here the multiple structural alignment algorithm comes in place to generate the alignment object |
| 71 | + MultipleMcMain algorithm = new MultipleMcMain(); |
| 72 | + MultipleMcParameters params = (MultipleMcParameters) algorithm.getParameters(); |
| 73 | + params.setPairwiseAlgorithm(CeCPMain.algorithmName); |
| 74 | + |
| 75 | + MultipleAlignment result = algorithm.align(atomArrays); |
| 76 | + result.getEnsemble().setStructureNames(names); |
| 77 | + |
| 78 | + //Information about the alignment |
| 79 | + result.getEnsemble().setAlgorithmName(algorithm.getAlgorithmName()); |
| 80 | + result.getEnsemble().setVersion(algorithm.getVersion()); |
| 81 | + |
| 82 | + //Output the sequence alignment + transformations |
| 83 | + System.out.println(MultipleAlignmentWriter.toFatCat(result)); |
| 84 | + //System.out.println(MultipleAlignmentWriter.toFASTA(result)); |
| 85 | + System.out.println(MultipleAlignmentWriter.toTransformMatrices(result)); |
| 86 | + } |
| 87 | +} |
0 commit comments