Skip to content

Commit 7e53ea7

Browse files
committed
Merge branch 'master' of github.com:biojava/biojava
# By Matt Larson (2) and lafita (2) # Via Jose Manuel Duarte (2) and Matt Larson (1) * 'master' of github.com:biojava/biojava: Fix NPE in the mmCIF parser when provided zero-length files. Adding TER records to pdb file export. Change print optimization history format in SymmOptimizer Create method to update symmetry scores
2 parents 373dd96 + f80ac2a commit 7e53ea7

6 files changed

Lines changed: 150 additions & 66 deletions

File tree

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/PDBFileParserTest.java

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,31 @@
2424
*/
2525
package org.biojava.nbio.structure.test;
2626

27-
import org.biojava.nbio.structure.*;
28-
import org.biojava.nbio.structure.align.util.AtomCache;
29-
import org.biojava.nbio.structure.io.FileParsingParameters;
30-
import org.biojava.nbio.structure.io.PDBFileParser;
31-
import org.biojava.nbio.structure.test.util.StringManipulationTestsHelper;
32-
import org.junit.Before;
33-
import org.junit.Test;
27+
import static org.junit.Assert.assertEquals;
28+
import static org.junit.Assert.assertNull;
29+
import static org.junit.Assert.assertTrue;
3430

3531
import java.io.BufferedReader;
3632
import java.io.IOException;
3733
import java.io.InputStream;
3834
import java.io.StringReader;
3935

40-
import static org.junit.Assert.*;
36+
import org.biojava.nbio.structure.Atom;
37+
import org.biojava.nbio.structure.Chain;
38+
import org.biojava.nbio.structure.ChainImpl;
39+
import org.biojava.nbio.structure.Group;
40+
import org.biojava.nbio.structure.JournalArticle;
41+
import org.biojava.nbio.structure.PDBHeader;
42+
import org.biojava.nbio.structure.Site;
43+
import org.biojava.nbio.structure.Structure;
44+
import org.biojava.nbio.structure.StructureException;
45+
import org.biojava.nbio.structure.StructureTools;
46+
import org.biojava.nbio.structure.align.util.AtomCache;
47+
import org.biojava.nbio.structure.io.FileParsingParameters;
48+
import org.biojava.nbio.structure.io.PDBFileParser;
49+
import org.biojava.nbio.structure.test.util.StringManipulationTestsHelper;
50+
import org.junit.Before;
51+
import org.junit.Test;
4152

4253
public class PDBFileParserTest {
4354

@@ -55,7 +66,8 @@ public void test2LetterResidueName() throws IOException {
5566
// from 1a4w:
5667
String t =
5768
"HETATM 2242 NA NA L 541 5.845 -14.122 30.560 0.88 23.48 NA"+newline+
58-
"HETATM 2243 NA NA L 542 18.411 -16.475 38.464 0.88 24.77 NA"+newline;
69+
"HETATM 2243 NA NA L 542 18.411 -16.475 38.464 0.88 24.77 NA"+newline+
70+
"TER"+newline;
5971
BufferedReader br = new BufferedReader(new StringReader(t));
6072
Structure s = parser.parsePDBFile(br);
6173
String pdb = s.toPDB();
@@ -85,7 +97,8 @@ public void testCorrectFloatingPointDisplay() throws IOException {
8597
"ATOM 11 C CYS L 1 9.580 14.524 21.006 1.00 18.64 C"+newline+
8698
"ATOM 12 O CYS L 1 9.110 15.220 21.912 1.00 19.03 O"+newline+
8799
"ATOM 13 CB CYS L 1 12.117 14.468 20.771 1.00 21.77 C"+newline+
88-
"ATOM 14 SG CYS L 1 12.247 14.885 22.538 1.00 20.55 S"+newline;
100+
"ATOM 14 SG CYS L 1 12.247 14.885 22.538 1.00 20.55 S"+newline+
101+
"TER"+newline;
89102

90103
BufferedReader br = new BufferedReader(new StringReader(t));
91104

@@ -527,7 +540,8 @@ public void testCorrectAtomNamePadding() throws IOException {
527540
"HETATM 2282 C22 QWE H 373 16.864 -13.556 14.739 1.00 42.63 C"+newline+
528541
"HETATM 2283 C2'1 QWE H 373 16.825 -12.903 16.107 1.00 40.59 C"+newline+
529542
"HETATM 2284 C42 QWE H 373 18.146 -14.734 13.451 1.00 43.96 C"+newline+
530-
"HETATM 2285 N3 QWE H 373 18.049 -13.554 14.106 1.00 43.46 N"+newline;
543+
"HETATM 2285 N3 QWE H 373 18.049 -13.554 14.106 1.00 43.46 N"+newline+
544+
"TER"+newline;
531545

532546
BufferedReader br = new BufferedReader(new StringReader(atomLines));
533547

biojava-structure/src/main/java/org/biojava/nbio/structure/io/FileConvert.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,6 @@
2121
*/
2222
package org.biojava.nbio.structure.io;
2323

24-
import org.biojava.nbio.structure.*;
25-
import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools;
26-
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
27-
import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
28-
import org.biojava.nbio.core.util.XMLWriter;
29-
3024
import java.io.IOException;
3125
import java.text.DateFormat;
3226
import java.text.DecimalFormat;
@@ -37,6 +31,20 @@
3731
import java.util.Locale;
3832
import java.util.Map;
3933

34+
import org.biojava.nbio.core.util.XMLWriter;
35+
import org.biojava.nbio.structure.Atom;
36+
import org.biojava.nbio.structure.Chain;
37+
import org.biojava.nbio.structure.DBRef;
38+
import org.biojava.nbio.structure.Element;
39+
import org.biojava.nbio.structure.Group;
40+
import org.biojava.nbio.structure.GroupType;
41+
import org.biojava.nbio.structure.PDBHeader;
42+
import org.biojava.nbio.structure.SSBond;
43+
import org.biojava.nbio.structure.Site;
44+
import org.biojava.nbio.structure.Structure;
45+
import org.biojava.nbio.structure.io.mmcif.MMCIFFileTools;
46+
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
47+
import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
4048
import org.slf4j.Logger;
4149
import org.slf4j.LoggerFactory;
4250

@@ -237,6 +245,8 @@ public String toPDB() {
237245

238246

239247
}
248+
// End any chains with a "TER" record.
249+
if (nrGroups > 0) str.append("TER").append(newline);
240250
}
241251

242252
if ( nrModels>1) {

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,63 @@
2121
*/
2222
package org.biojava.nbio.structure.io.mmcif;
2323

24-
import org.biojava.nbio.structure.Structure;
25-
import org.biojava.nbio.structure.io.MMCIFFileReader;
26-
import org.biojava.nbio.structure.io.StructureIOFile;
27-
import org.biojava.nbio.structure.io.mmcif.model.*;
28-
import org.biojava.nbio.structure.jama.Matrix;
29-
import org.slf4j.Logger;
30-
import org.slf4j.LoggerFactory;
31-
32-
import javax.vecmath.Matrix4d;
33-
3424
import java.io.BufferedReader;
3525
import java.io.IOException;
3626
import java.io.InputStream;
3727
import java.io.InputStreamReader;
3828
import java.lang.reflect.InvocationTargetException;
3929
import java.lang.reflect.Method;
40-
import java.util.*;
30+
import java.util.ArrayList;
31+
import java.util.HashMap;
32+
import java.util.HashSet;
33+
import java.util.List;
34+
import java.util.Map;
35+
import java.util.Set;
36+
37+
import javax.vecmath.Matrix4d;
38+
39+
import org.biojava.nbio.structure.Structure;
40+
import org.biojava.nbio.structure.io.MMCIFFileReader;
41+
import org.biojava.nbio.structure.io.StructureIOFile;
42+
import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
43+
import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor;
44+
import org.biojava.nbio.structure.io.mmcif.model.Cell;
45+
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
46+
import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom;
47+
import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
48+
import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor;
49+
import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark;
50+
import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev;
51+
import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord;
52+
import org.biojava.nbio.structure.io.mmcif.model.Entity;
53+
import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq;
54+
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen;
55+
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat;
56+
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn;
57+
import org.biojava.nbio.structure.io.mmcif.model.Exptl;
58+
import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor;
59+
import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier;
60+
import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly;
61+
import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme;
62+
import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme;
63+
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly;
64+
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen;
65+
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList;
66+
import org.biojava.nbio.structure.io.mmcif.model.Refine;
67+
import org.biojava.nbio.structure.io.mmcif.model.Struct;
68+
import org.biojava.nbio.structure.io.mmcif.model.StructAsym;
69+
import org.biojava.nbio.structure.io.mmcif.model.StructConn;
70+
import org.biojava.nbio.structure.io.mmcif.model.StructKeywords;
71+
import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper;
72+
import org.biojava.nbio.structure.io.mmcif.model.StructRef;
73+
import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq;
74+
import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif;
75+
import org.biojava.nbio.structure.io.mmcif.model.StructSite;
76+
import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen;
77+
import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
78+
import org.biojava.nbio.structure.jama.Matrix;
79+
import org.slf4j.Logger;
80+
import org.slf4j.LoggerFactory;
4181

4282
/**
4383
* A simple mmCif file parser
@@ -170,7 +210,7 @@ public void parse(BufferedReader buf)
170210

171211
// the first line is a data_PDBCODE line, test if this looks like a mmcif file
172212
line = buf.readLine();
173-
if (!line.startsWith(MMCIF_TOP_HEADER)){
213+
if (line == null || !line.startsWith(MMCIF_TOP_HEADER)){
174214
logger.error("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'");
175215
triggerDocumentEnd();
176216
return;

biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/internal/SequenceFunctionRefiner.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public static AFPChain refineSymmetry(AFPChain afpChain, Atom[] ca1, Atom[] ca2,
9191
return refinedAFP;
9292
} catch (IndexOutOfBoundsException e){
9393
// This Exception is thrown when the refined alignment is not consistent
94-
throw new RefinerFailedException("Refiner failure", e);
94+
throw new RefinerFailedException("Refiner failure: non-consistent result", e);
9595
}
9696
}
9797

biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/internal/SymmOptimizer.java

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ public class SymmOptimizer {
9292

9393
// Variables that store the history of the optimization - slower if on
9494
private static final boolean history = false;
95-
private static final String pathToHistory = "SymmOptHistory.csv";
95+
private static final int saveStep = 100;
96+
private static final String pathToHistory = "./results/";
9697
private List<Integer> lengthHistory;
9798
private List<Double> rmsdHistory;
9899
private List<Double> scoreHistory;
@@ -165,7 +166,7 @@ private void initialize() throws StructureException, RefinerFailedException {
165166
}
166167
checkGaps();
167168

168-
// Set the MC score and RMSD of the initial state (seed alignment)
169+
// Set the MC score of the initial state (seed alignment)
169170
updateMultipleAlignment();
170171
mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend,
171172
dCutoff);
@@ -267,30 +268,23 @@ public MultipleAlignment optimize() throws StructureException,
267268
+ ", --conv: " + conv);
268269

269270
if (history) {
270-
if (i % 100 == 1) {
271+
if (i % saveStep == 1) {
271272
// Get the correct superposition again
272273
updateMultipleAlignment();
273-
double rmsd = MultipleAlignmentScorer.getRMSD(msa);
274274

275275
lengthHistory.add(length);
276-
rmsdHistory.add(rmsd);
277-
scoreHistory.add(mcScore);
276+
rmsdHistory.add(msa.getScore(MultipleAlignmentScorer.RMSD));
277+
scoreHistory.add(msa.getScore(MultipleAlignmentScorer.AVGTM_SCORE));
278278
}
279279
}
280280

281281
i++;
282282
}
283-
// Superimpose and calculate scores
283+
// Superimpose and calculate final scores
284284
updateMultipleAlignment();
285285
mcScore = MultipleAlignmentScorer.getMCScore(msa, Gopen, Gextend,
286286
dCutoff);
287-
double tmScore = MultipleAlignmentScorer.getAvgTMScore(msa) * order;
288-
double rmsd = MultipleAlignmentScorer.getRMSD(msa);
289-
290-
// Set the scores
291287
msa.putScore(MultipleAlignmentScorer.MC_SCORE, mcScore);
292-
msa.putScore(MultipleAlignmentScorer.AVGTM_SCORE, tmScore);
293-
msa.putScore(MultipleAlignmentScorer.RMSD, rmsd);
294288

295289
// Save the history to the results folder of the symmetry project
296290
if (history) {
@@ -808,14 +802,18 @@ private double probabilityFunction(double AS, int m, int maxIter) {
808802
/**
809803
* Save the evolution of the optimization process as a csv file.
810804
*/
811-
private void saveHistory(String filePath) throws IOException {
805+
private void saveHistory(String folder) throws IOException {
812806

813-
FileWriter writer = new FileWriter(filePath);
814-
writer.append("Step,Length,RMSD,Score\n");
807+
String name = msa.getStructureIdentifier(0).getIdentifier();
808+
FileWriter writer = new FileWriter(folder + name + "-symm_optimization.csv");
809+
writer.append("Structure,Step,Repeat Length,RMSD,TM-Score\n");
815810

816811
for (int i = 0; i < lengthHistory.size(); i++) {
817-
writer.append(i * 100 + "," + lengthHistory.get(i) + ","
818-
+ rmsdHistory.get(i) + "," + scoreHistory.get(i) + "\n");
812+
writer.append(name + ",");
813+
writer.append(i * saveStep + ",");
814+
writer.append(lengthHistory.get(i) + ",");
815+
writer.append(rmsdHistory.get(i) + ",");
816+
writer.append(scoreHistory.get(i) + "\n");
819817
}
820818

821819
writer.flush();

0 commit comments

Comments
 (0)