11package org .biojava .nbio .structure .align .cemc ;
22
33import java .util .ArrayList ;
4+ import java .util .Arrays ;
45import java .util .List ;
56import java .util .concurrent .Callable ;
67import java .util .concurrent .ExecutionException ;
1213import org .biojava .nbio .structure .StructureException ;
1314import org .biojava .nbio .structure .align .CallableStructureAlignment ;
1415import org .biojava .nbio .structure .align .MultipleStructureAligner ;
15- import org .biojava .nbio .structure .align .ce .CeMain ;
16+ import org .biojava .nbio .structure .align .ce .CeCPMain ;
1617import org .biojava .nbio .structure .align .ce .ConfigStrucAligParams ;
1718import org .biojava .nbio .structure .align .model .AFPChain ;
1819import org .biojava .nbio .structure .align .multiple .Block ;
2829import org .biojava .nbio .structure .align .multiple .ReferenceSuperimposer ;
2930
3031/**
31- * The main class of the Java implementation of the Combinatorial Extension - Monte Carlo Algorithm (CEMC),
32- * as has been originally developed by C.Guda, E.D.Scheeff, P.E. Bourne and I.N. Shindyalov (2001).
32+ * The main class of the Java implementation of the Combinatorial Extension - Monte Carlo (CEMC) Algorithm,
33+ * as it was originally described by C.Guda, E.D.Scheeff, P.E. Bourne and I.N. Shindyalov (2001).
34+ * <p>
3335 * The original CEMC paper is available from <a href="http://psb.stanford.edu/psb-online/proceedings/psb01/guda.pdf">here</a>.
34- *
35- * There is still no demo on how to use this algorithm.
36+ * <p>
37+ * The usage follows the {@link MultipleStructureAligner} interface.
38+ * A Demo on how to use the algorithm can be found in {@link DemoCEMC}.
3639 *
3740 * @author Aleix Lafita
3841 *
3942 */
40- public class CeMcMain implements MultipleStructureAligner {
43+ public class CeMcMain implements MultipleStructureAligner {
4144
4245 /**
43- * version history:
44- * 1.0 - Initial code implementation from CEMC article.
46+ * Version history:<p>
47+ * 1.0 - Initial code implementation from CEMC article without partial gaps.<p>
48+ * 2.0 - Update to support CP and partial gaps.<p>
4549 */
46- public static final String version = "1 .0" ;
50+ public static final String version = "2 .0" ;
4751 public static final String algorithmName = "jCEMC" ;
52+
4853 private CeMcParameters params ;
4954 private MultipleAlignmentEnsemble ensemble ;
55+ int reference = 0 ;
5056
5157 /**
52- * Default constructor. Instantiates an empty CeMcMain object with the default parameters.
58+ * Default constructor.
59+ * Default parameters are used.
5360 */
5461 public CeMcMain (){
5562 ensemble = null ;
5663 params = new CeMcParameters ();
5764 }
5865
5966 /**
60- * Creates the seed of the multiple structure alignment, before optimization, from all-to-all pairwise alignments
61- * of the structures. The alignments are generated in parallel using the Java API for concurrency management.
62- * The closest structure to all others is chosen as the reference and all the alignments to it are taken to generate
63- * an ungapped seed MultipleAlignment .
64- *
65- * This method is static because can be used outside this alignment class.
67+ * Creates a MultipleAlignment seed for MC optimization from the representative Atoms
68+ * of the structures. If there are N structures:
69+ * <ul><li>Generate (N*(N-1))/2 all-to- all alignments in parallel using the Java API.
70+ * <li>Choose the closest structure to all others as the reference .
71+ * <li>Generate a MultipleAlignment by combining all the alignments to the reference.
72+ * </ul>
6673 *
6774 * @param atomArrays List of Atoms to align of the structures
6875 * @return MultipleAlignment seed alignment
6976 * @throws ExecutionException
7077 * @throws InterruptedException
7178 * @throws StructureException
7279 */
73- public MultipleAlignment generateSeed (List <Atom []> atomArrays ) throws InterruptedException , ExecutionException , StructureException {
80+ private MultipleAlignment generateSeed (List <Atom []> atomArrays ) throws InterruptedException , ExecutionException , StructureException {
7481
7582 int size = atomArrays .size ();
7683
77- //List to store the all-to-all alignments. Contains the n^2 pairwise alignments as a 2D matrix indices.
84+ //List to store the all-to-all alignments. Contains the n^2 pairwise alignments as a 2D List
7885 List <List <AFPChain >> afpAlignments = new ArrayList <List <AFPChain >>();
7986 for (int i =0 ; i <size ; i ++){
8087 afpAlignments .add (new ArrayList <AFPChain >());
@@ -86,16 +93,17 @@ public MultipleAlignment generateSeed(List<Atom[]> atomArrays) throws Interrupte
8693 ExecutorService executor = Executors .newCachedThreadPool ();
8794 List <Future <AFPChain >> afpFuture = new ArrayList <Future <AFPChain >>();
8895
89- //Create all the possible protein pairwise combinations and call the alignment
96+ //Create all the possible protein pairwise combinations (N*(N-1)/2) and call the CeCP pairwise alignment algorithm
9097 for (int i =0 ; i <size ; i ++){
9198 for (int j =i +1 ; j <size ; j ++){
9299
93- Callable <AFPChain > worker = new CallableStructureAlignment (atomArrays .get (i ), atomArrays .get (j ), CeMain .algorithmName );
100+ Callable <AFPChain > worker = new CallableStructureAlignment (atomArrays .get (i ), atomArrays .get (j ), CeCPMain .algorithmName );
94101 Future <AFPChain > submit = executor .submit (worker );
95102 afpFuture .add (submit );
96103 }
97104 }
98105
106+ //Store the resulting AFPChains in the 2D List
99107 int index = 0 ; //the alignment index
100108 for (int i =0 ; i <size ; i ++){
101109 for (int j =i ; j <size ; j ++){
@@ -108,107 +116,124 @@ public MultipleAlignment generateSeed(List<Atom[]> atomArrays) throws Interrupte
108116 }
109117 executor .shutdown (); //Finish the executor because all the pairwise alignments have been calculated.
110118
111- //Choose the reference structure from the lowest average RMSD
119+ reference = chooseReferenceRMSD (afpAlignments );
120+ return combineReferenceAlignments (afpAlignments .get (reference ), atomArrays , reference );
121+ }
122+
123+ /**
124+ * This method takes the all-to-all pairwise alignments Matrix (as a double List of AFPChain) and
125+ * calculates the structure with the lowest average RMSD against all others. The index of this structure
126+ * is returned.
127+ *
128+ * @param alignments List double containing all-to-all pairwise alignments
129+ * @return int reference index
130+ */
131+ private static int chooseReferenceRMSD (List <List <AFPChain >> afpAlignments ){
132+
133+ int size = afpAlignments .size ();
134+
112135 List <Double > RMSDs = new ArrayList <Double >();
113- for (int i =0 ; i <size ; i ++){
136+ for (int i =0 ; i <afpAlignments . size () ; i ++){
114137 double rmsd =0.0 ;
115138 for (int j =0 ; j <size ; j ++){
116139 if (i !=j ) rmsd += afpAlignments .get (i ).get (j ).getTotalRmsdOpt ();
117140 }
118141 RMSDs .add (rmsd );
119142 }
120- int ref = 0 ;
143+ int reference = 0 ;
121144 for (int i =1 ; i <size ; i ++){
122- if (RMSDs .get (i ) < RMSDs .get (ref )) ref = i ;
145+ if (RMSDs .get (i ) < RMSDs .get (reference )) reference = i ;
123146 }
124-
125- return seedFromReference (afpAlignments .get (ref ), atomArrays , ref );
147+ return reference ;
126148 }
127149
128150 /**
129- * This method takes a list of pairwise alignments to the reference structure and calculates the
130- * MultipleAlignment resulting from them. It ignores blocks in AFPChain (flexible parts) and builds
131- * the Blocks as the definition of MultipleAlignment dictates {@link Block}. Gaps are not included.
151+ * This method takes a list of pairwise alignments to the reference structure and calculates a
152+ * MultipleAlignment resulting from combining their residue equivalencies.
153+ * <p>
154+ * It uses the blocks in AFPChain as {@link Block}s in the MultipleAlignment, so only CP are considered,
155+ * and flexible parts are ignored.
132156 *
133157 * @param afpList the list of pairwise alignments to the reference
134158 * @param atomArrays List of Atoms of the structures
135159 * @param ref index of the reference structure
136160 * @return MultipleAlignment seed alignment
137161 * @throws StructureException
138162 */
139- private MultipleAlignment seedFromReference (List <AFPChain > afpList , List <Atom []> atomArrays , int ref ) throws StructureException {
163+ private static MultipleAlignment combineReferenceAlignments (List <AFPChain > afpList , List <Atom []> atomArrays , int ref ) throws StructureException {
140164
141165 int size = atomArrays .size (); //the number of structures
142166 int length = 0 ; //the number of residues of the reference structure
143- if (ref ==0 ) length = afpList .get (1 ).getCa1Length ();
167+ if (ref ==0 ) length = afpList .get (1 ).getCa1Length (); //because the 0-0 alignment is null
144168 else length = afpList .get (0 ).getCa2Length ();
145169
146- //Stores the alignment equivalencies of all the structures as a double List (first index reference residue, second structure nr) - note this is the inverse of AlignRes in Block
170+ //Stores the alignment equivalencies of all the structures as a double List: equivalencies[str][res]
147171 List <List <Integer >> equivalencies = new ArrayList <List <Integer >>();
148- for (int i =0 ; i < length ; i ++){
172+ for (int str =0 ; str < size ; str ++){
149173 equivalencies .add (new ArrayList <Integer >());
150- for (int j =0 ; j < size ; j ++){
151- if (j ==ref ) equivalencies .get (i ).add (i );
152- else equivalencies .get (i ).add (null );
174+ for (int res =0 ; res < length ; res ++){
175+ if (str ==ref ) equivalencies .get (str ).add (res ); //identity
176+ else equivalencies .get (str ).add (null );
153177 }
154178 }
155179
156180 //Now we parse the AFPChains adding the residue equivalencies
157- for (int j =0 ; j <size ; j ++){
158- if (j ==ref ) continue ; //avoid the self-comparison because it is null
159- for (int bk =0 ; bk <afpList .get (j ).getBlockNum (); bk ++){
160- for (int i =0 ; i <afpList .get (j ).getOptLen ()[bk ]; i ++){
161- int res1 = 0 ;
181+ for (int str =0 ; str <size ; str ++){
182+ if (str ==ref ) continue ; //avoid the self-comparison because it is null
183+ for (int bk =0 ; bk <afpList .get (str ).getBlockNum (); bk ++){
184+ for (int i =0 ; i <afpList .get (str ).getOptLen ()[bk ]; i ++){
185+ int res1 = 0 ; //reference index
162186 int res2 = 0 ;
163- //The low index is always in the first chain and the higher in the second
164- if (j >ref ){
165- res1 = afpList .get (j ).getOptAln ()[bk ][0 ][i ];
166- res2 = afpList .get (j ).getOptAln ()[bk ][1 ][i ];
187+ //The low index is always in the first chain (0) and the higher in the second (1)
188+ if (str >ref ){
189+ res1 = afpList .get (str ).getOptAln ()[bk ][0 ][i ];
190+ res2 = afpList .get (str ).getOptAln ()[bk ][1 ][i ];
167191 }
168- else if (j <ref ){
169- res1 = afpList .get (j ).getOptAln ()[bk ][1 ][i ];
170- res2 = afpList .get (j ).getOptAln ()[bk ][0 ][i ];
192+ else if (str <ref ){
193+ res1 = afpList .get (str ).getOptAln ()[bk ][1 ][i ];
194+ res2 = afpList .get (str ).getOptAln ()[bk ][0 ][i ];
171195 }
172- equivalencies .get (res1 ).set (j ,res2 );
196+ equivalencies .get (str ).set (res1 ,res2 );
173197 }
174198 }
175199 }
176200
177- //Now that we have the equivalencies we create the MultipleAlignment
178- MultipleAlignment seed = new MultipleAlignmentImpl (ensemble );
201+ //Now that we have translated the equivalencies we create the MultipleAlignment
202+ MultipleAlignment seed = new MultipleAlignmentImpl ();
203+ seed .getEnsemble ().setAtomArrays (atomArrays );
179204 BlockSet blockSet = new BlockSetImpl (seed );
180- new BlockImpl (blockSet ); //This automatically adds a Block to the Block list in BlockSet
205+ new BlockImpl (blockSet );
181206
182- //We loop through all the equivalencies checking for gapped columns and CP
183- //Case CP: we start a new block
184- //Case gap: we do not include the equivalencies, the seed cannot contain gaps
185- for (int i =0 ; i <length ; i ++){
186- boolean gap = false ;
207+ //Store last positions in the block different than null to detect CP
208+ int [] lastResidues = new int [size ];
209+ Arrays .fill (lastResidues , -1 );
210+
211+ //We loop through all the equivalencies checking for CP: start new Block if CP
212+ for (int pos =0 ; pos <length ; pos ++){
187213 boolean cp = false ;
188- for (int j =0 ; j <size ; j ++){
189- if (equivalencies .get (i ).get (j ) == null ){ //there is a gap, do not consider the
190- gap = true ;
214+ for (int str =0 ; str <size ; str ++){
215+ if (equivalencies .get (str ).get (pos ) == null ) continue ; //there is a gap, ignore position
216+ else if (equivalencies .get (str ).get (pos ) < lastResidues [str ]){
217+ cp = true ; //Because the current residue is lower than the last added for at least one structure
191218 break ;
192- }
193- if (blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).length () > 0 ){
194- if (equivalencies .get (i ).get (j ) < blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 )
195- .getAlignRes ().get (j ).get ( blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).length ()-1 )){
196- cp = true ; //Because the current residue is lower than the last added for at least one structure
197- }
198- }
219+ } else lastResidues [str ] = equivalencies .get (str ).get (pos );
199220 }
200- if (gap ) continue ;
201- if (cp ) //if there is a CP create a new Block
221+ if (cp ){ //if there is a CP create a new Block, initialize AlignRes and clear lastResidues
202222 new BlockImpl (blockSet );
223+ Arrays .fill (lastResidues ,-1 );
224+ }
203225
204226 //Now add the equivalent residues into the Block AlignRes variable
205- for (int j =0 ; j <size ; j ++){
206- if (blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).getAlignRes ().size () == 0 ) //If it is empty initialize a list for every structure
207- for (int k =0 ; k <size ; k ++) blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).getAlignRes ().add (new ArrayList <Integer >());
208- blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).getAlignRes ().get (j ).add (equivalencies .get (i ).get (j ));
227+ for (int str =0 ; str <size ; str ++){
228+ if (blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).getAlignRes () == null ){ //If it is empty initialize it
229+ List <List <Integer >> alnRes = new ArrayList <List <Integer >>(size );
230+ for (int k =0 ; k <size ; k ++) alnRes .add (new ArrayList <Integer >());
231+ blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).setAlignRes (alnRes );
232+ }
233+ blockSet .getBlocks ().get (blockSet .getBlocks ().size ()-1 ).getAlignRes ().get (str ).add (equivalencies .get (str ).get (pos ));
209234 }
210235 }
211- MultipleSuperimposer imposer = new ReferenceSuperimposer ();
236+ MultipleSuperimposer imposer = new ReferenceSuperimposer (ref );
212237 imposer .superimpose (seed );
213238 return seed ;
214239 }
@@ -228,8 +253,8 @@ public MultipleAlignment align(List<Atom[]> atomArrays, Object params) throws St
228253 int seed = 0 ;
229254
230255 //Repeat the optimization 10 times in parallel, to obtain a more robust result.
231- for (int i =0 ; i <10 ; i ++){
232- Callable <MultipleAlignment > worker = new CeMcOptimizer (result , seed +i );
256+ for (int i =0 ; i <1 ; i ++){
257+ Callable <MultipleAlignment > worker = new CeMcOptimizer (result , seed +i , reference );
233258 Future <MultipleAlignment > submit = executor .submit (worker );
234259 afpFuture .add (submit );
235260 }
0 commit comments