3131import org .biojava .nbio .core .sequence .ProteinSequence ;
3232import org .biojava .nbio .core .sequence .compound .AminoAcidCompound ;
3333import org .biojava .nbio .structure .Atom ;
34+ import org .biojava .nbio .structure .Chain ;
35+ import org .biojava .nbio .structure .Structure ;
3436import org .biojava .nbio .structure .StructureException ;
3537import org .biojava .nbio .structure .align .StructureAlignment ;
3638import org .biojava .nbio .structure .align .StructureAlignmentFactory ;
4547import org .biojava .nbio .structure .align .multiple .MultipleAlignmentImpl ;
4648import org .biojava .nbio .structure .align .multiple .util .MultipleAlignmentScorer ;
4749import org .biojava .nbio .structure .align .multiple .util .ReferenceSuperimposer ;
50+ import org .biojava .nbio .structure .quaternary .BiologicalAssemblyBuilder ;
4851import org .biojava .nbio .structure .symmetry .core .QuatSymmetrySubunits ;
4952import org .biojava .nbio .structure .symmetry .internal .CESymmParameters ;
5053import org .biojava .nbio .structure .symmetry .internal .CeSymm ;
7174 */
7275public class SubunitCluster {
7376
74- private static final Logger logger = LoggerFactory
75- .getLogger (SubunitCluster .class );
77+ private static final Logger logger = LoggerFactory .getLogger (SubunitCluster .class );
7678
77- private List <Subunit > subunits = new ArrayList <Subunit >();
78- private List <List <Integer >> subunitEQR = new ArrayList <List < Integer > >();
79+ private List <Subunit > subunits = new ArrayList <>();
80+ private List <List <Integer >> subunitEQR = new ArrayList <>();
7981 private int representative = -1 ;
8082
8183 private SubunitClustererMethod method = SubunitClustererMethod .SEQUENCE ;
@@ -119,7 +121,7 @@ public SubunitCluster(Subunit subunit) {
119121
120122 subunits .add (subunit );
121123
122- List <Integer > identity = new ArrayList <Integer >();
124+ List <Integer > identity = new ArrayList <>();
123125 for (int i = 0 ; i < subunit .size (); i ++)
124126 identity .add (i );
125127 subunitEQR .add (identity );
@@ -179,6 +181,38 @@ public boolean isIdenticalTo(SubunitCluster other) {
179181 return thisSequence .equals (otherSequence );
180182 }
181183
184+ /**
185+ * Tells whether the other SubunitCluster contains exactly the same Subunit.
186+ * This is checked by equality of their entity identifiers if they are present.
187+ *
188+ * @param other
189+ * SubunitCluster
190+ * @return true if the SubunitClusters are identical, false otherwise
191+ */
192+ public boolean isIdenticalByEntityIdTo (SubunitCluster other ) {
193+ Structure thisStruct = this .subunits .get (this .representative ).getStructure ();
194+ Structure otherStruct = other .subunits .get (other .representative ).getStructure ();
195+ String thisName = this .subunits .get (this .representative ).getName ();
196+ String otherName = other .subunits .get (this .representative ).getName ();
197+ Chain thisChain = thisStruct .getChain (thisName );
198+ Chain otherChain = otherStruct .getChain (otherName );
199+ if (thisChain == null || otherChain == null ) {
200+ logger .info ("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id" ,
201+ this .subunits .get (this .representative ).getName (),
202+ other .subunits .get (other .representative ).getName ());
203+ return false ;
204+ }
205+ if (thisChain .getEntityInfo () == null || otherChain .getEntityInfo () == null ) {
206+ logger .info ("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id" ,
207+ this .subunits .get (this .representative ).getName (),
208+ other .subunits .get (other .representative ).getName ());
209+ return false ;
210+ }
211+ int thisEntityId = thisChain .getEntityInfo ().getMolId ();
212+ int otherEntityId = otherChain .getEntityInfo ().getMolId ();
213+ return thisEntityId == otherEntityId ;
214+ }
215+
182216 /**
183217 * Merges the other SubunitCluster into this one if it contains exactly the
184218 * same Subunit. This is checked by {@link #isIdenticalTo(SubunitCluster)}.
@@ -192,7 +226,35 @@ public boolean mergeIdentical(SubunitCluster other) {
192226 if (!isIdenticalTo (other ))
193227 return false ;
194228
195- logger .info ("SubunitClusters are identical" );
229+ logger .info ("SubunitClusters {}-{} are identical in sequence" ,
230+ this .subunits .get (this .representative ).getName (),
231+ other .subunits .get (other .representative ).getName ());
232+
233+ this .subunits .addAll (other .subunits );
234+ this .subunitEQR .addAll (other .subunitEQR );
235+
236+ return true ;
237+ }
238+
239+ /**
240+ * Merges the other SubunitCluster into this one if it contains exactly the
241+ * same Subunit. This is checked by comparing the entity identifiers of the subunits
242+ * if one can be found.
243+ * Thus this only makes sense when the subunits are complete chains of a
244+ * deposited PDB entry. I
245+ *
246+ * @param other
247+ * SubunitCluster
248+ * @return true if the SubunitClusters were merged, false otherwise
249+ */
250+ public boolean mergeIdenticalByEntityId (SubunitCluster other ) {
251+
252+ if (!isIdenticalByEntityIdTo (other ))
253+ return false ;
254+
255+ logger .info ("SubunitClusters {}-{} belong to same entity. Assuming they are identical" ,
256+ this .subunits .get (this .representative ).getName (),
257+ other .subunits .get (other .representative ).getName ());
196258
197259 this .subunits .addAll (other .subunits );
198260 this .subunitEQR .addAll (other .subunitEQR );
@@ -296,13 +358,15 @@ public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters pa
296358 return false ;
297359 }
298360
299- logger .info (String .format ("SubunitClusters are similar in sequence "
300- + "with %.2f sequence identity and %.2f coverage" , sequenceIdentity ,
301- sequenceCoverage ));
361+ logger .info (String .format ("SubunitClusters %s-%s are similar in sequence "
362+ + "with %.2f sequence identity and %.2f coverage" ,
363+ this .subunits .get (this .representative ).getName (),
364+ other .subunits .get (other .representative ).getName (),
365+ sequenceIdentity , sequenceCoverage ));
302366
303367 // If coverage and sequence identity sufficient, merge other and this
304- List <Integer > thisAligned = new ArrayList <Integer >();
305- List <Integer > otherAligned = new ArrayList <Integer >();
368+ List <Integer > thisAligned = new ArrayList <>();
369+ List <Integer > otherAligned = new ArrayList <>();
306370
307371 // Extract the aligned residues of both Subunit
308372 for (int p = 1 ; p < aligner .getPair ().getLength () + 1 ; p ++) {
@@ -318,60 +382,15 @@ public boolean mergeSequence(SubunitCluster other, SubunitClustererParameters pa
318382
319383 // Only consider residues that are part of the SubunitCluster
320384 if (this .subunitEQR .get (this .representative ).contains (thisIndex )
321- && other .subunitEQR .get (other .representative ).contains (
322- otherIndex )) {
385+ && other .subunitEQR .get (other .representative ).contains (otherIndex )) {
323386 thisAligned .add (thisIndex );
324387 otherAligned .add (otherIndex );
325388 }
326389 }
327390
328- // Do a List intersection to find out which EQR columns to remove
329- List <Integer > thisRemove = new ArrayList <Integer >();
330- List <Integer > otherRemove = new ArrayList <Integer >();
331-
332- for (int t = 0 ; t < this .subunitEQR .get (this .representative ).size (); t ++) {
333- // If the index is aligned do nothing, otherwise mark as removing
334- if (!thisAligned .contains (this .subunitEQR .get (this .representative )
335- .get (t )))
336- thisRemove .add (t );
337- }
338-
339- for (int t = 0 ; t < other .subunitEQR .get (other .representative ).size (); t ++) {
340- // If the index is aligned do nothing, otherwise mark as removing
341- if (!otherAligned .contains (other .subunitEQR .get (
342- other .representative ).get (t )))
343- otherRemove .add (t );
344- }
345- // Now remove unaligned columns, from end to start
346- Collections .sort (thisRemove );
347- Collections .reverse (thisRemove );
348- Collections .sort (otherRemove );
349- Collections .reverse (otherRemove );
350-
351- for (int t = 0 ; t < thisRemove .size (); t ++) {
352- for (List <Integer > eqr : this .subunitEQR ) {
353- int column = thisRemove .get (t );
354- eqr .remove (column );
355- }
356- }
357-
358- for (int t = 0 ; t < otherRemove .size (); t ++) {
359- for (List <Integer > eqr : other .subunitEQR ) {
360- int column = otherRemove .get (t );
361- eqr .remove (column );
362- }
363- }
364-
365- // The representative is the longest sequence
366- if (this .subunits .get (this .representative ).size () < other .subunits .get (
367- other .representative ).size ())
368- this .representative = other .representative + subunits .size ();
369-
370- this .subunits .addAll (other .subunits );
371- this .subunitEQR .addAll (other .subunitEQR );
391+ updateEquivResidues (other , thisAligned , otherAligned );
372392
373393 this .method = SubunitClustererMethod .SEQUENCE ;
374-
375394 pseudoStoichiometric = !params .isHighConfidenceScores (sequenceIdentity ,sequenceCoverage );
376395
377396 return true ;
@@ -445,8 +464,8 @@ public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters p
445464
446465 // Merge clusters
447466 List <List <Integer >> alignedRes = msa .getBlock (0 ).getAlignRes ();
448- List <Integer > thisAligned = new ArrayList <Integer >();
449- List <Integer > otherAligned = new ArrayList <Integer >();
467+ List <Integer > thisAligned = new ArrayList <>();
468+ List <Integer > otherAligned = new ArrayList <>();
450469
451470 // Extract the aligned residues of both Subunit
452471 for (int p = 0 ; p < msa .length (); p ++) {
@@ -469,24 +488,30 @@ public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters p
469488 }
470489 }
471490
491+ updateEquivResidues (other , thisAligned , otherAligned );
492+
493+ this .method = SubunitClustererMethod .STRUCTURE ;
494+ pseudoStoichiometric = true ;
495+
496+ return true ;
497+ }
498+
499+ private void updateEquivResidues (SubunitCluster other , List <Integer > thisAligned , List <Integer > otherAligned ) {
472500 // Do a List intersection to find out which EQR columns to remove
473- List <Integer > thisRemove = new ArrayList <Integer >();
474- List <Integer > otherRemove = new ArrayList <Integer >();
501+ List <Integer > thisRemove = new ArrayList <>();
502+ List <Integer > otherRemove = new ArrayList <>();
475503
476504 for (int t = 0 ; t < this .subunitEQR .get (this .representative ).size (); t ++) {
477505 // If the index is aligned do nothing, otherwise mark as removing
478- if (!thisAligned .contains (this .subunitEQR .get (this .representative )
479- .get (t )))
506+ if (!thisAligned .contains (this .subunitEQR .get (this .representative ).get (t )))
480507 thisRemove .add (t );
481508 }
482509
483510 for (int t = 0 ; t < other .subunitEQR .get (other .representative ).size (); t ++) {
484511 // If the index is aligned do nothing, otherwise mark as removing
485- if (!otherAligned .contains (other .subunitEQR .get (
486- other .representative ).get (t )))
512+ if (!otherAligned .contains (other .subunitEQR .get (other .representative ).get (t )))
487513 otherRemove .add (t );
488514 }
489-
490515 // Now remove unaligned columns, from end to start
491516 Collections .sort (thisRemove );
492517 Collections .reverse (thisRemove );
@@ -508,17 +533,12 @@ public boolean mergeStructure(SubunitCluster other, SubunitClustererParameters p
508533 }
509534
510535 // The representative is the longest sequence
511- if (this .subunits .get (this .representative ).size () < other .subunits .get (
512- other .representative ).size ())
536+ if (this .subunits .get (this .representative ).size () < other .subunits .get (other .representative ).size ())
513537 this .representative = other .representative + subunits .size ();
514538
515539 this .subunits .addAll (other .subunits );
516540 this .subunitEQR .addAll (other .subunitEQR );
517541
518- this .method = SubunitClustererMethod .STRUCTURE ;
519- pseudoStoichiometric = true ;
520-
521- return true ;
522542 }
523543
524544 /**
@@ -568,9 +588,9 @@ public boolean divideInternally(SubunitClustererParameters clusterParams)
568588 List <List <Integer >> alignedRes = result .getMultipleAlignment ()
569589 .getBlock (0 ).getAlignRes ();
570590
571- List <List <Integer >> columns = new ArrayList <List < Integer > >();
591+ List <List <Integer >> columns = new ArrayList <>();
572592 for (int s = 0 ; s < alignedRes .size (); s ++)
573- columns .add (new ArrayList <Integer >(alignedRes .get (s ).size ()));
593+ columns .add (new ArrayList <>(alignedRes .get (s ).size ()));
574594
575595 // Extract the aligned columns of each repeat in the Subunit
576596 for (int col = 0 ; col < alignedRes .get (0 ).size (); col ++) {
0 commit comments