2727import org .slf4j .LoggerFactory ;
2828
2929import javax .vecmath .Point3d ;
30+ import javax .vecmath .Vector3d ;
3031import java .util .*;
3132import java .util .concurrent .ExecutorService ;
3233import java .util .concurrent .Executors ;
3334
3435
35-
36-
3736/**
3837 * Class to calculate Accessible Surface Areas based on
3938 * the rolling ball algorithm by Shrake and Rupley.
4241 * (now source is available at https://github.com/boscoh/asa).
4342 * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog.
4443 * <p>
44+ * A few optimizations come from Eisenhaber et al, J Comp Chemistry 1994
45+ * (https://onlinelibrary.wiley.com/doi/epdf/10.1002/jcc.540160303)
46+ * <p>
4547 * See
4648 * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms.
4749 * Lysozyme and Insulin." JMB (1973) 79:351-371.
4850 * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of
4951 * Static Accessibility" JMB (1971) 55:379-400
5052 *
5153 * @author Jose Duarte
52- *
5354 */
5455public class AsaCalculator {
5556
@@ -86,10 +87,10 @@ public class AsaCalculator {
8687
8788 private class AsaCalcWorker implements Runnable {
8889
89- private int i ;
90- private double [] asas ;
90+ private final int i ;
91+ private final double [] asas ;
9192
92- public AsaCalcWorker (int i , double [] asas ) {
93+ private AsaCalcWorker (int i , double [] asas ) {
9394 this .i = i ;
9495 this .asas = asas ;
9596 }
@@ -100,15 +101,24 @@ public void run() {
100101 }
101102 }
102103
104+ static class IndexAndDistance {
105+ final int index ;
106+ final double dist ;
107+ IndexAndDistance (int index , double dist ) {
108+ this .index = index ;
109+ this .dist = dist ;
110+ }
111+ }
112+
103113
104- private Point3d [] atomCoords ;
105- private Atom [] atoms ;
106- private double [] radii ;
107- private double probe ;
108- private int nThreads ;
109- private Point3d [] spherePoints ;
114+ private final Point3d [] atomCoords ;
115+ private final Atom [] atoms ;
116+ private final double [] radii ;
117+ private final double probe ;
118+ private final int nThreads ;
119+ private Vector3d [] spherePoints ;
110120 private double cons ;
111- private int [][] neighborIndices ;
121+ private IndexAndDistance [][] neighborIndices ;
112122
113123 private boolean useSpatialHashingForNeighbors ;
114124
@@ -123,7 +133,6 @@ public void run() {
123133 * @param nThreads the number of parallel threads to use for the calculation
124134 * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to
125135 * NACCESS' -h option
126- * @see StructureTools#getAllNonHAtomArray
127136 */
128137 public AsaCalculator (Structure structure , double probe , int nSpherePoints , int nThreads , boolean hetAtoms ) {
129138 this .atoms = StructureTools .getAllNonHAtomArray (structure , hetAtoms );
@@ -243,7 +252,7 @@ public GroupAsa[] getGroupAsas() {
243252 }
244253 }
245254
246- return asas .values ().toArray (new GroupAsa [asas . size () ]);
255+ return asas .values ().toArray (new GroupAsa [0 ]);
247256 }
248257
249258 /**
@@ -276,46 +285,9 @@ public double[] calculateAsas() {
276285
277286 } else {
278287 logger .debug ("Will use {} threads for ASA calculation" , nThreads );
279- // NOTE the multithreaded calculation does not scale up well in some systems,
280- // why? I guess some memory/garbage collect problem? I tried increasing Xmx in pc8201 but didn't help
281-
282- // Following scaling tests are for 3hbx, calculating ASA of full asym unit (6 chains):
283-
284- // SCALING test done in merlinl01 (12 cores, Xeon X5670 @ 2.93GHz, 24GB RAM)
285- //1 threads, time: 8.8s -- x1.0
286- //2 threads, time: 4.4s -- x2.0
287- //3 threads, time: 2.9s -- x3.0
288- //4 threads, time: 2.2s -- x3.9
289- //5 threads, time: 1.8s -- x4.9
290- //6 threads, time: 1.6s -- x5.5
291- //7 threads, time: 1.4s -- x6.5
292- //8 threads, time: 1.3s -- x6.9
293-
294- // SCALING test done in pc8201 (4 cores, Core2 Quad Q9550 @ 2.83GHz, 8GB RAM)
295- //1 threads, time: 17.2s -- x1.0
296- //2 threads, time: 9.7s -- x1.8
297- //3 threads, time: 7.7s -- x2.2
298- //4 threads, time: 7.9s -- x2.2
299-
300- // SCALING test done in eppic01 (16 cores, Xeon E5-2650 0 @ 2.00GHz, 128GB RAM)
301- //1 threads, time: 10.7s -- x1.0
302- //2 threads, time: 5.6s -- x1.9
303- //3 threads, time: 3.6s -- x3.0
304- //4 threads, time: 2.8s -- x3.9
305- //5 threads, time: 2.3s -- x4.8
306- //6 threads, time: 1.8s -- x6.0
307- //7 threads, time: 1.6s -- x6.8
308- //8 threads, time: 1.3s -- x8.0
309- //9 threads, time: 1.3s -- x8.5
310- //10 threads, time: 1.1s -- x10.0
311- //11 threads, time: 1.0s -- x10.9
312- //12 threads, time: 0.9s -- x11.4
313-
314-
315288
316289 ExecutorService threadPool = Executors .newFixedThreadPool (nThreads );
317290
318-
319291 for (int i =0 ;i <atomCoords .length ;i ++) {
320292 threadPool .submit (new AsaCalcWorker (i ,asas ));
321293 }
@@ -342,20 +314,20 @@ void setUseSpatialHashingForNeighbors(boolean useSpatialHashingForNeighbors) {
342314 }
343315
344316 /**
345- * Returns list of 3d coordinates of points on a sphere using the
317+ * Returns list of 3d coordinates of points on a unit sphere using the
346318 * Golden Section Spiral algorithm.
347319 * @param nSpherePoints the number of points to be used in generating the spherical dot-density
348- * @return
320+ * @return the array of points as Vector3d objects
349321 */
350- private Point3d [] generateSpherePoints (int nSpherePoints ) {
351- Point3d [] points = new Point3d [nSpherePoints ];
322+ private Vector3d [] generateSpherePoints (int nSpherePoints ) {
323+ Vector3d [] points = new Vector3d [nSpherePoints ];
352324 double inc = Math .PI * (3.0 - Math .sqrt (5.0 ));
353325 double offset = 2.0 / nSpherePoints ;
354326 for (int k =0 ;k <nSpherePoints ;k ++) {
355327 double y = k * offset - 1.0 + (offset / 2.0 );
356328 double r = Math .sqrt (1.0 - y *y );
357329 double phi = k * inc ;
358- points [k ] = new Point3d (Math .cos (phi )*r , y , Math .sin (phi )*r );
330+ points [k ] = new Vector3d (Math .cos (phi )*r , y , Math .sin (phi )*r );
359331 }
360332 return points ;
361333 }
@@ -364,30 +336,29 @@ private Point3d[] generateSpherePoints(int nSpherePoints) {
364336 * Returns the 2-dimensional array with neighbor indices for every atom.
365337 * @return 2-dimensional array of size: n_atoms x n_neighbors_per_atom
366338 */
367- int [][] findNeighborIndices () {
339+ IndexAndDistance [][] findNeighborIndices () {
368340
369341 // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30
370342 int initialCapacity = 60 ;
371343
372- int [][] nbsIndices = new int [atomCoords .length ][];
344+ IndexAndDistance [][] nbsIndices = new IndexAndDistance [atomCoords .length ][];
373345
374346 for (int k =0 ; k <atomCoords .length ; k ++) {
375347 double radius = radii [k ] + probe + probe ;
376348
377- List <Integer > thisNbIndices = new ArrayList <>(initialCapacity );
349+ List <IndexAndDistance > thisNbIndices = new ArrayList <>(initialCapacity );
378350
379351 for (int i = 0 ; i < atomCoords .length ; i ++) {
380352 if (i == k ) continue ;
381353
382354 double dist = atomCoords [i ].distance (atomCoords [k ]);
383355
384356 if (dist < radius + radii [i ]) {
385- thisNbIndices .add (i );
357+ thisNbIndices .add (new IndexAndDistance ( i , dist ) );
386358 }
387359 }
388360
389- int [] indicesArray = new int [thisNbIndices .size ()];
390- for (int i =0 ;i <thisNbIndices .size ();i ++) indicesArray [i ] = thisNbIndices .get (i );
361+ IndexAndDistance [] indicesArray = thisNbIndices .toArray (new IndexAndDistance [0 ]);
391362 nbsIndices [k ] = indicesArray ;
392363 }
393364 return nbsIndices ;
@@ -398,20 +369,20 @@ int[][] findNeighborIndices() {
398369 * using spatial hashing to avoid all to all distance calculation.
399370 * @return 2-dimensional array of size: n_atoms x n_neighbors_per_atom
400371 */
401- int [][] findNeighborIndicesSpatialHashing () {
372+ IndexAndDistance [][] findNeighborIndicesSpatialHashing () {
402373
403374 // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30
404375 int initialCapacity = 60 ;
405376
406377 List <Contact > contactList = calcContacts ();
407- Map <Integer , List <Integer >> indices = new HashMap <>(atomCoords .length );
378+ Map <Integer , List <IndexAndDistance >> indices = new HashMap <>(atomCoords .length );
408379 for (Contact contact : contactList ) {
409380 // note contacts are stored 1-way only, with j>i
410381 int i = contact .getI ();
411382 int j = contact .getJ ();
412383
413- List <Integer > iIndices ;
414- List <Integer > jIndices ;
384+ List <IndexAndDistance > iIndices ;
385+ List <IndexAndDistance > jIndices ;
415386 if (!indices .containsKey (i )) {
416387 iIndices = new ArrayList <>(initialCapacity );
417388 indices .put (i , iIndices );
@@ -428,24 +399,23 @@ int[][] findNeighborIndicesSpatialHashing() {
428399 double radius = radii [i ] + probe + probe ;
429400 double dist = contact .getDistance ();
430401 if (dist < radius + radii [j ]) {
431- iIndices .add (j );
432- jIndices .add (i );
402+ iIndices .add (new IndexAndDistance ( j , dist ) );
403+ jIndices .add (new IndexAndDistance ( i , dist ) );
433404 }
434405 }
435406
436407 // convert map to array for fast access
437- int [][] nbsIndices = new int [atomCoords .length ][];
438- for (Map .Entry <Integer , List <Integer >> entry : indices .entrySet ()) {
439- List <Integer > list = entry .getValue ();
440- int [] indicesArray = new int [list .size ()];
441- for (int i =0 ;i <entry .getValue ().size ();i ++) indicesArray [i ] = list .get (i );
442- nbsIndices [entry .getKey ()] = indicesArray ;
408+ IndexAndDistance [][] nbsIndices = new IndexAndDistance [atomCoords .length ][];
409+ for (Map .Entry <Integer , List <IndexAndDistance >> entry : indices .entrySet ()) {
410+ List <IndexAndDistance > list = entry .getValue ();
411+ IndexAndDistance [] indexAndDistances = list .toArray (new IndexAndDistance [0 ]);
412+ nbsIndices [entry .getKey ()] = indexAndDistances ;
443413 }
444414
445415 // important: some atoms might have no neighbors at all: we need to initialise to empty arrays
446416 for (int i =0 ; i <nbsIndices .length ; i ++) {
447417 if (nbsIndices [i ] == null ) {
448- nbsIndices [i ] = new int [0 ];
418+ nbsIndices [i ] = new IndexAndDistance [0 ];
449419 }
450420 }
451421
@@ -474,35 +444,51 @@ private double calcSingleAsa(int i) {
474444 Point3d atom_i = atomCoords [i ];
475445
476446 int n_neighbor = neighborIndices [i ].length ;
477- int [] neighbor_indices = neighborIndices [i ];
478- int j_closest_neighbor = 0 ;
479- double radius = probe + radii [i ];
447+ IndexAndDistance [] neighbor_indices = neighborIndices [i ];
448+ // Sorting by closest to farthest away neighbors achieves faster runtimes when checking for occluded
449+ // sphere sample points below. This follows the ideas exposed in
450+ // Eisenhaber et al, J Comp Chemistry 1994 (https://onlinelibrary.wiley.com/doi/epdf/10.1002/jcc.540160303)
451+ // This is essential for performance. In my tests this brings down the number of occlusion checks in loop below to
452+ // an average of n_sphere_points/10 per atom i, producing ~ x4 performance gain overall
453+ Arrays .sort (neighbor_indices , Comparator .comparingDouble (o -> o .dist ));
454+
455+ double radius_i = probe + radii [i ];
480456
481457 int n_accessible_point = 0 ;
458+ // purely for debugging
459+ int [] numDistsCalced = null ;
460+ if (logger .isDebugEnabled ()) numDistsCalced = new int [n_neighbor ];
461+
462+ // now we precalculate anything depending only on i,j in equation 3 in Eisenhaber 1994
463+ double [] sqRadii = new double [n_neighbor ];
464+ Vector3d [] aj_minus_ais = new Vector3d [n_neighbor ];
465+ for (int nbArrayInd =0 ; nbArrayInd <n_neighbor ; nbArrayInd ++) {
466+ int j = neighbor_indices [nbArrayInd ].index ;
467+ double dist = neighbor_indices [nbArrayInd ].dist ;
468+ double radius_j = radii [j ] + probe ;
469+ // see equation 3 in Eisenhaber 1994
470+ sqRadii [nbArrayInd ] = (dist *dist + radius_i *radius_i - radius_j *radius_j )/(2 *radius_i );
471+ Vector3d aj_minus_ai = new Vector3d (atomCoords [j ]);
472+ aj_minus_ai .sub (atom_i );
473+ aj_minus_ais [nbArrayInd ] = aj_minus_ai ;
474+ }
482475
483- for (Point3d point : spherePoints ){
476+ for (Vector3d point : spherePoints ){
484477 boolean is_accessible = true ;
485- Point3d test_point = new Point3d (point .x *radius + atom_i .x ,
486- point .y *radius + atom_i .y ,
487- point .z *radius + atom_i .z );
488-
489- int [] cycled_indices = new int [n_neighbor ];
490- int arind = 0 ;
491- for (int ind =j_closest_neighbor ;ind <n_neighbor ;ind ++) {
492- cycled_indices [arind ] = ind ;
493- arind ++;
494- }
495- for (int ind =0 ;ind <j_closest_neighbor ;ind ++){
496- cycled_indices [arind ] = ind ;
497- arind ++;
498- }
499478
500- for (int j : cycled_indices ) {
501- Point3d atom_j = atomCoords [neighbor_indices [j ]];
502- double r = radii [neighbor_indices [j ]] + probe ;
503- double diff_sq = test_point .distanceSquared (atom_j );
504- if (diff_sq < r *r ) {
505- j_closest_neighbor = j ;
479+ // note that the neighbors are sorted by distance, achieving optimal performance in this inner loop
480+ // See Eisenhaber et al, J Comp Chemistry 1994
481+
482+ for (int nbArrayInd =0 ; nbArrayInd <n_neighbor ; nbArrayInd ++) {
483+
484+ // see equation 3 in Eisenhaber 1994. This is slightly more efficient than
485+ // calculating distances to the actual sphere points on atom_i (which would be obtained with:
486+ // Point3d test_point = new Point3d(point.x*radius + atom_i.x,point.y*radius + atom_i.y,point.z*radius + atom_i.z))
487+ double dotProd = aj_minus_ais [nbArrayInd ].dot (point );
488+
489+ if (numDistsCalced !=null ) numDistsCalced [nbArrayInd ]++;
490+
491+ if (dotProd > sqRadii [nbArrayInd ]) {
506492 is_accessible = false ;
507493 break ;
508494 }
@@ -511,7 +497,15 @@ private double calcSingleAsa(int i) {
511497 n_accessible_point ++;
512498 }
513499 }
514- return cons *n_accessible_point *radius *radius ;
500+
501+ // purely for debugging
502+ if (numDistsCalced !=null ) {
503+ int sum = 0 ;
504+ for (int numDistCalcedForJ : numDistsCalced ) sum += numDistCalcedForJ ;
505+ logger .debug ("Number of sample points distances calculated for neighbors of i={} : average {}, all {}" , i , (double ) sum / (double ) n_neighbor , numDistsCalced );
506+ }
507+
508+ return cons *n_accessible_point *radius_i *radius_i ;
515509 }
516510
517511 /**
@@ -577,16 +571,15 @@ else if (atomCode.equals("CA") || atomCode.equals("CB") ||
577571 else if (atomCode .equals ("CG" )) return TETRAHEDRAL_CARBON_VDW ;
578572
579573 default :
580- logger .info ("Unexpected carbon atom " + atomCode + " for aminoacid " + aa + " , assigning its standard vdw radius" );
574+ logger .info ("Unexpected carbon atom {} for aminoacid {} , assigning its standard vdw radius" , atomCode , aa );
581575 return Element .C .getVDWRadius ();
582576 }
583577 }
584578
585579 // not any of the expected atoms
586580 } else {
587581 // non standard aas, (e.g. MSE, LLP) will always have this problem,
588- logger .info ("Unexpected atom " +atomCode +" for aminoacid " +aa + " (" +amino .getPDBName ()+"), assigning its standard vdw radius" );
589-
582+ logger .debug ("Unexpected atom {} for aminoacid {} ({}), assigning its standard vdw radius" , atomCode , aa , amino .getPDBName ());
590583
591584 return atom .getElement ().getVDWRadius ();
592585 }
0 commit comments