2121package org .biojava .nbio .structure .asa ;
2222
2323import org .biojava .nbio .structure .*;
24+ import org .biojava .nbio .structure .contact .Contact ;
25+ import org .biojava .nbio .structure .contact .Grid ;
2426import org .slf4j .Logger ;
2527import org .slf4j .LoggerFactory ;
2628
2729import javax .vecmath .Point3d ;
28- import java .util .ArrayList ;
29- import java .util .TreeMap ;
30+ import java .util .*;
3031import java .util .concurrent .ExecutorService ;
3132import java .util .concurrent .Executors ;
3233
3637/**
3738 * Class to calculate Accessible Surface Areas based on
3839 * the rolling ball algorithm by Shrake and Rupley.
39- *
40+ * <p>
4041 * The code is adapted from a python implementation at http://boscoh.com/protein/asapy
4142 * (now source is available at https://github.com/boscoh/asa).
4243 * Thanks to Bosco K. Ho for a great piece of code and for his fantastic blog.
43- *
44+ * <p>
4445 * See
4546 * Shrake, A., and J. A. Rupley. "Environment and Exposure to Solvent of Protein Atoms.
4647 * Lysozyme and Insulin." JMB (1973) 79:351-371.
4748 * Lee, B., and Richards, F.M. "The interpretation of Protein Structures: Estimation of
4849 * Static Accessibility" JMB (1971) 55:379-400
49- * @author duarte_j
50+ *
51+ * @author Jose Duarte
5052 *
5153 */
5254public class AsaCalculator {
5355
5456 private static final Logger logger = LoggerFactory .getLogger (AsaCalculator .class );
5557
56- // Bosco uses as default 960, Shrake and Rupley seem to use in their paper 92 (not sure if this is actually the same parameter)
57- public static final int DEFAULT_N_SPHERE_POINTS = 960 ;
58+ /**
59+ * The default value for number of sphere points to sample.
60+ * See this paper for a nice study on the effect of this parameter: https://f1000research.com/articles/5-189/v1
61+ */
62+ public static final int DEFAULT_N_SPHERE_POINTS = 1000 ;
5863 public static final double DEFAULT_PROBE_SIZE = 1.4 ;
5964 public static final int DEFAULT_NTHREADS = 1 ;
6065
66+ private static final boolean DEFAULT_USE_SPATIAL_HASHING = true ;
67+
6168
6269
6370 // Chothia's amino acid atoms vdw radii
@@ -101,15 +108,19 @@ public void run() {
101108 private int nThreads ;
102109 private Point3d [] spherePoints ;
103110 private double cons ;
111+ private int [][] neighborIndices ;
112+
113+ private boolean useSpatialHashingForNeighbors ;
104114
105115 /**
106116 * Constructs a new AsaCalculator. Subsequently call {@link #calculateAsas()}
107117 * or {@link #getGroupAsas()} to calculate the ASAs
108118 * Only non-Hydrogen atoms are considered in the calculation.
109- * @param structure
110- * @param probe
111- * @param nSpherePoints
112- * @param nThreads
119+ * @param structure the structure, all non-H atoms will be used
120+ * @param probe the probe size
121+ * @param nSpherePoints the number of points to be used in generating the spherical
122+ * dot-density, the more points the more accurate (and slower) calculation
123+ * @param nThreads the number of parallel threads to use for the calculation
113124 * @param hetAtoms if true HET residues are considered, if false they aren't, equivalent to
114125 * NACCESS' -h option
115126 * @see StructureTools#getAllNonHAtomArray
@@ -120,16 +131,15 @@ public AsaCalculator(Structure structure, double probe, int nSpherePoints, int n
120131 this .probe = probe ;
121132 this .nThreads = nThreads ;
122133
134+ this .useSpatialHashingForNeighbors = DEFAULT_USE_SPATIAL_HASHING ;
135+
123136 // initialising the radii by looking them up through AtomRadii
124137 radii = new double [atomCoords .length ];
125138 for (int i =0 ;i <atomCoords .length ;i ++) {
126139 radii [i ] = getRadius (atoms [i ]);
127140 }
128141
129- // initialising the sphere points to sample
130- spherePoints = generateSpherePoints (nSpherePoints );
131-
132- cons = 4.0 * Math .PI / nSpherePoints ;
142+ initSpherePoints (nSpherePoints );
133143 }
134144
135145 /**
@@ -148,6 +158,8 @@ public AsaCalculator(Atom[] atoms, double probe, int nSpherePoints, int nThreads
148158 this .probe = probe ;
149159 this .nThreads = nThreads ;
150160
161+ this .useSpatialHashingForNeighbors = DEFAULT_USE_SPATIAL_HASHING ;
162+
151163 for (Atom atom :atoms ) {
152164 if (atom .getElement ()==Element .H )
153165 throw new IllegalArgumentException ("Can't calculate ASA for an array that contains Hydrogen atoms " );
@@ -159,10 +171,7 @@ public AsaCalculator(Atom[] atoms, double probe, int nSpherePoints, int nThreads
159171 radii [i ] = getRadius (atoms [i ]);
160172 }
161173
162- // initialising the sphere points to sample
163- spherePoints = generateSpherePoints (nSpherePoints );
164-
165- cons = 4.0 * Math .PI / nSpherePoints ;
174+ initSpherePoints (nSpherePoints );
166175 }
167176
168177 /**
@@ -188,12 +197,21 @@ public AsaCalculator(Point3d[] atomCoords, double probe, int nSpherePoints, int
188197 this .probe = probe ;
189198 this .nThreads = nThreads ;
190199
200+ this .useSpatialHashingForNeighbors = DEFAULT_USE_SPATIAL_HASHING ;
201+
191202 // initialising the radii to the given radius for all atoms
192203 radii = new double [atomCoords .length ];
193204 for (int i =0 ;i <atomCoords .length ;i ++) {
194205 radii [i ] = radius ;
195206 }
196207
208+ initSpherePoints (nSpherePoints );
209+ }
210+
211+ private void initSpherePoints (int nSpherePoints ) {
212+
213+ logger .debug ("Will use {} sphere points" , nSpherePoints );
214+
197215 // initialising the sphere points to sample
198216 spherePoints = generateSpherePoints (nSpherePoints );
199217
@@ -209,7 +227,7 @@ public AsaCalculator(Point3d[] atomCoords, double probe, int nSpherePoints, int
209227 */
210228 public GroupAsa [] getGroupAsas () {
211229
212- TreeMap <ResidueNumber , GroupAsa > asas = new TreeMap <ResidueNumber , GroupAsa >();
230+ TreeMap <ResidueNumber , GroupAsa > asas = new TreeMap <>();
213231
214232 double [] asasPerAtom = calculateAsas ();
215233
@@ -238,12 +256,26 @@ public double[] calculateAsas() {
238256
239257 double [] asas = new double [atomCoords .length ];
240258
259+ long start = System .currentTimeMillis ();
260+ if (useSpatialHashingForNeighbors ) {
261+ logger .debug ("Will use spatial hashing to find neighbors" );
262+ neighborIndices = findNeighborIndicesSpatialHashing ();
263+ } else {
264+ logger .debug ("Will not use spatial hashing to find neighbors" );
265+ neighborIndices = findNeighborIndices ();
266+ }
267+ long end = System .currentTimeMillis ();
268+ logger .debug ("Took {} s to find neighbors" , (end -start )/1000.0 );
269+
270+ start = System .currentTimeMillis ();
241271 if (nThreads <=1 ) { // (i.e. it will also be 1 thread if 0 or negative number specified)
272+ logger .debug ("Will use 1 thread for ASA calculation" );
242273 for (int i =0 ;i <atomCoords .length ;i ++) {
243274 asas [i ] = calcSingleAsa (i );
244275 }
245276
246277 } else {
278+ logger .debug ("Will use {} threads for ASA calculation" , nThreads );
247279 // NOTE the multithreaded calculation does not scale up well in some systems,
248280 // why? I guess some memory/garbage collect problem? I tried increasing Xmx in pc8201 but didn't help
249281
@@ -293,10 +325,22 @@ public double[] calculateAsas() {
293325 while (!threadPool .isTerminated ());
294326
295327 }
328+ end = System .currentTimeMillis ();
329+ logger .debug ("Took {} s to calculate all {} atoms ASAs (excluding neighbors calculation)" , (end -start )/1000.0 , atomCoords .length );
296330
297331 return asas ;
298332 }
299333
334+ /**
335+ * Set the useSpatialHashingForNeighbors flag to use spatial hashing to calculate neighbors (true) or all-to-all
336+ * distance calculation (false). Default is {@value DEFAULT_USE_SPATIAL_HASHING}.
337+ * Use for testing performance only.
338+ * @param useSpatialHashingForNeighbors the flag
339+ */
340+ void setUseSpatialHashingForNeighbors (boolean useSpatialHashingForNeighbors ) {
341+ this .useSpatialHashingForNeighbors = useSpatialHashingForNeighbors ;
342+ }
343+
300344 /**
301345 * Returns list of 3d coordinates of points on a sphere using the
302346 * Golden Section Spiral algorithm.
@@ -317,36 +361,118 @@ private Point3d[] generateSpherePoints(int nSpherePoints) {
317361 }
318362
319363 /**
320- * Returns list of indices of atoms within probe distance to atom k .
321- * @param k index of atom for which we want neighbor indices
364+ * Returns the 2-dimensional array with neighbor indices for every atom.
365+ * @return 2-dimensional array of size: n_atoms x n_neighbors_per_atom
322366 */
323- private Integer [] findNeighborIndices (int k ) {
367+ int [][] findNeighborIndices () {
368+
324369 // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30
325- // Thus 40 seems to be a good compromise for the starting capacity
326- ArrayList <Integer > neighbor_indices = new ArrayList <>(40 );
370+ int initialCapacity = 60 ;
327371
328- double radius = radii [ k ] + probe + probe ;
372+ int [][] nbsIndices = new int [ atomCoords . length ][] ;
329373
330- for (int i =0 ;i <atomCoords .length ;i ++) {
331- if (i ==k ) continue ;
374+ for (int k =0 ; k <atomCoords .length ; k ++) {
375+ double radius = radii [k ] + probe + probe ;
376+
377+ List <Integer > thisNbIndices = new ArrayList <>(initialCapacity );
378+
379+ for (int i = 0 ; i < atomCoords .length ; i ++) {
380+ if (i == k ) continue ;
381+
382+ double dist = atomCoords [i ].distance (atomCoords [k ]);
383+
384+ if (dist < radius + radii [i ]) {
385+ thisNbIndices .add (i );
386+ }
387+ }
388+
389+ int [] indicesArray = new int [thisNbIndices .size ()];
390+ for (int i =0 ;i <thisNbIndices .size ();i ++) indicesArray [i ] = thisNbIndices .get (i );
391+ nbsIndices [k ] = indicesArray ;
392+ }
393+ return nbsIndices ;
394+ }
395+
396+ /**
397+ * Returns the 2-dimensional array with neighbor indices for every atom,
398+ * using spatial hashing to avoid all to all distance calculation.
399+ * @return 2-dimensional array of size: n_atoms x n_neighbors_per_atom
400+ */
401+ int [][] findNeighborIndicesSpatialHashing () {
332402
333- double dist = atomCoords [i ].distance (atomCoords [k ]);
403+ // looking at a typical protein case, number of neighbours are from ~10 to ~50, with an average of ~30
404+ int initialCapacity = 60 ;
405+
406+ List <Contact > contactList = calcContacts ();
407+ Map <Integer , List <Integer >> indices = new HashMap <>(atomCoords .length );
408+ for (Contact contact : contactList ) {
409+ // note contacts are stored 1-way only, with j>i
410+ int i = contact .getI ();
411+ int j = contact .getJ ();
412+
413+ List <Integer > iIndices ;
414+ List <Integer > jIndices ;
415+ if (!indices .containsKey (i )) {
416+ iIndices = new ArrayList <>(initialCapacity );
417+ indices .put (i , iIndices );
418+ } else {
419+ iIndices = indices .get (i );
420+ }
421+ if (!indices .containsKey (j )) {
422+ jIndices = new ArrayList <>(initialCapacity );
423+ indices .put (j , jIndices );
424+ } else {
425+ jIndices = indices .get (j );
426+ }
334427
335- if (dist < radius + radii [i ]) {
336- neighbor_indices .add (i );
428+ double radius = radii [i ] + probe + probe ;
429+ double dist = contact .getDistance ();
430+ if (dist < radius + radii [j ]) {
431+ iIndices .add (j );
432+ jIndices .add (i );
337433 }
434+ }
338435
436+ // convert map to array for fast access
437+ int [][] nbsIndices = new int [atomCoords .length ][];
438+ for (Map .Entry <Integer , List <Integer >> entry : indices .entrySet ()) {
439+ List <Integer > list = entry .getValue ();
440+ int [] indicesArray = new int [list .size ()];
441+ for (int i =0 ;i <entry .getValue ().size ();i ++) indicesArray [i ] = list .get (i );
442+ nbsIndices [entry .getKey ()] = indicesArray ;
339443 }
340444
341- Integer [] indicesArray = new Integer [neighbor_indices .size ()];
342- indicesArray = neighbor_indices .toArray (indicesArray );
343- return indicesArray ;
445+ return nbsIndices ;
446+ }
447+
448+ Point3d [] getAtomCoords () {
449+ return atomCoords ;
450+ }
451+
452+ private List <Contact > calcContacts () {
453+ double maxRadius = maxValue (radii );
454+ double cutoff = maxRadius + maxRadius + probe + probe ;
455+ logger .debug ("Max radius is {}, cutoff is {}" , maxRadius , cutoff );
456+ Grid grid = new Grid (cutoff );
457+ grid .addCoords (atomCoords );
458+ return grid .getIndicesContacts ();
459+ }
460+
461+ private static double maxValue (double [] array ) {
462+ double max = array [0 ];
463+ for (int i = 0 ; i < array .length ; i ++) {
464+ if (array [i ] > max ) {
465+ max = array [i ];
466+ }
467+ }
468+ return max ;
344469 }
345470
346471 private double calcSingleAsa (int i ) {
347472 Point3d atom_i = atomCoords [i ];
348- Integer [] neighbor_indices = findNeighborIndices (i );
349- int n_neighbor = neighbor_indices .length ;
473+
474+ int n_neighbor = neighborIndices [i ].length ;
475+ int [] neighbor_indices = neighborIndices [i ];
350476 int j_closest_neighbor = 0 ;
351477 double radius = probe + radii [i ];
352478
@@ -497,7 +623,7 @@ private static double getRadiusForNucl(NucleotideImpl nuc, Atom atom) {
497623 *
498624 * If atom is neither part of a nucleotide nor of a standard aminoacid,
499625 * the default vdw radius for the element is returned. If atom is of
500- * unknown type (element) the vdw radius of {@link # Element().N} is returned
626+ * unknown type (element) the vdw radius of {@link Element().N} is returned
501627 *
502628 * @param atom
503629 * @return
0 commit comments