Skip to content

Commit 3a06c1e

Browse files
authored
Merge pull request #868 from josemduarte/mmcif-branched-polymers
Minimal read support for files with 'branched' entities
2 parents 36073a8 + ab76f92 commit 3a06c1e

5 files changed

Lines changed: 101 additions & 11 deletions

File tree

biojava-structure/src/main/java/org/biojava/nbio/structure/EntityType.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ public enum EntityType {
4444
*/
4545
POLYMER("polymer"),
4646

47+
/**
48+
* The 'branched' type use mainly to represent carbohydrates.
49+
* The type was introduced in these versions of the mmcif dictionary:
50+
* 5.101 2012-08-22
51+
* 5.291 2017-09-10
52+
* 5.304 2018-08-01
53+
* The type will only be used for PDB-deposited files from July 2020, as part of
54+
* the carbohydrate remediation project.
55+
* @since 5.4.0
56+
*/
57+
BRANCHED("branched"),
58+
4759
/**
4860
* Non-polymeric entities: ligands, metal ions, buffer molecules, etc
4961
*/

biojava-structure/src/main/java/org/biojava/nbio/structure/Model.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ public void addChain(Chain c) {
110110
logger.warn("Chain with asym id {} (author id {}) has entity type 'macrolide', considering it non-polymeric", c.getId(), c.getName());
111111
nonPolyChains.add(c);
112112

113+
} else if (info.getType() == EntityType.BRANCHED) {
114+
logger.warn("Chain with asym id {} (author id {}) has entity type 'branched', considering it non-polymeric", c.getId(), c.getName());
115+
nonPolyChains.add(c);
116+
113117
} else {
114118
logger.warn("Chain with asym id {} (author id {}) has unsupported entity type '{}'. Will not add it to the Structure.", c.getId(), c.getName(), info.getType().toString());
115119
// ignore it

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmtf/MmtfStructureWriter.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
3838
import org.rcsb.mmtf.api.StructureAdapterInterface;
3939
import org.rcsb.mmtf.dataholders.MmtfStructure;
40+
import org.slf4j.Logger;
41+
import org.slf4j.LoggerFactory;
4042

4143
/**
4244
* Class to take Biojava structure data and covert to the DataApi for encoding.
@@ -48,7 +50,9 @@
4850
*/
4951
public class MmtfStructureWriter {
5052

51-
private StructureAdapterInterface mmtfDecoderInterface;
53+
private static final Logger logger = LoggerFactory.getLogger(MmtfStructureWriter.class);
54+
55+
private final StructureAdapterInterface mmtfDecoderInterface;
5256

5357
/**
5458
* Pass data from Biojava structure to another generic output type. Loops through the data
@@ -179,9 +183,8 @@ private void storeEntityInformation(List<Chain> allChains, List<EntityInfo> enti
179183
List<Chain> entityChains = entityInfo.getChains();
180184
if (entityChains.isEmpty()){
181185
// Error mapping chain to entity
182-
System.err.println("ERROR MAPPING CHAIN TO ENTITY: "+description);
186+
logger.error("ERROR MAPPING CHAIN TO ENTITY: "+description);
183187
mmtfDecoderInterface.setEntityInfo(new int[0], "", description, type);
184-
continue;
185188
}
186189
else{
187190
int[] chainIndices = new int[entityChains.size()];
@@ -194,7 +197,7 @@ private void storeEntityInformation(List<Chain> allChains, List<EntityInfo> enti
194197
chainImpl = (ChainImpl) entityChains.get(0);
195198
}
196199
else{
197-
throw new RuntimeException();
200+
throw new RuntimeException("Encountered Chain of unexpected type");
198201
}
199202
String sequence = chainImpl.getSeqResOneLetterSeq();
200203
mmtfDecoderInterface.setEntityInfo(chainIndices, sequence, description, type);
@@ -205,8 +208,7 @@ private void storeEntityInformation(List<Chain> allChains, List<EntityInfo> enti
205208

206209
/**
207210
* Generate the bioassembly information on in the desired form.
208-
* @param bioJavaStruct the Biojava structure
209-
* @param header the header
211+
*
210212
*/
211213
private void storeBioassemblyInformation(Map<String, Integer> chainIdToIndexMap, Map<Integer, BioAssemblyInfo> inputBioAss) {
212214
int bioAssemblyIndex = 0;

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestNonDepositedFiles.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.io.IOException;
2828
import java.io.InputStream;
2929
import java.io.InputStreamReader;
30+
import java.net.URL;
3031
import java.util.List;
3132
import java.util.zip.GZIPInputStream;
3233

@@ -443,4 +444,35 @@ private static int[] countEntityTypes(List<EntityInfo> entities) {
443444
return counts;
444445

445446
}
447+
448+
@Test
449+
public void testStructureWithBranchedEntities() throws IOException {
450+
// Example carbohydrate remediation file to be released in July 2020
451+
URL url = new URL("https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/examples/models/1B5F-carb.cif");
452+
InputStream inStream = url.openStream();
453+
454+
MMcifParser parser = new SimpleMMcifParser();
455+
456+
SimpleMMcifConsumer consumer = new SimpleMMcifConsumer();
457+
parser.addMMcifConsumer(consumer);
458+
parser.parse(new BufferedReader(new InputStreamReader(inStream)));
459+
460+
Structure structure = consumer.getStructure();
461+
462+
assertEquals(7, structure.getEntityInfos().size());
463+
464+
assertEquals(2, structure.getEntityById(1).getChains().size());
465+
assertEquals(2, structure.getEntityById(2).getChains().size());
466+
467+
// we consider the branched chains non-poly chains
468+
assertEquals(4, structure.getNonPolyChains().size());
469+
assertEquals(4, structure.getPolyChains().size());
470+
471+
assertEquals(1, structure.getEntityById(3).getChains().size());
472+
473+
// chain asym_id="E" is from entity 3
474+
assertSame(structure.getNonPolyChain("E"), structure.getEntityById(3).getChains().get(0));
475+
476+
assertEquals(5, structure.getNonPolyChain("E").getAtomGroups().size());
477+
}
446478
}

biojava-structure/src/test/java/org/biojava/nbio/structure/io/mmtf/TestMmtfRoundTrip.java

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@
2020
*/
2121
package org.biojava.nbio.structure.io.mmtf;
2222

23-
import static org.junit.Assert.assertArrayEquals;
24-
import static org.junit.Assert.assertEquals;
25-
import static org.junit.Assert.assertNotNull;
26-
import static org.junit.Assert.assertTrue;
27-
23+
import java.io.BufferedReader;
2824
import java.io.IOException;
25+
import java.io.InputStream;
26+
import java.io.InputStreamReader;
27+
import java.net.URL;
2928
import java.util.ArrayList;
3029
import java.util.Collections;
3130
import java.util.Comparator;
@@ -43,12 +42,17 @@
4342
import org.biojava.nbio.structure.io.FileParsingParameters;
4443
import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
4544
import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider;
45+
import org.biojava.nbio.structure.io.mmcif.MMcifParser;
46+
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifConsumer;
47+
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
4648
import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
4749
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
4850
import org.junit.Test;
4951
import org.rcsb.mmtf.decoder.StructureDataToAdapter;
5052
import org.rcsb.mmtf.encoder.AdapterToStructureData;
5153

54+
import static org.junit.Assert.*;
55+
5256
/**
5357
* Tests to see if roundtripping of MMTF can be done.
5458
*
@@ -350,4 +354,40 @@ private void checkBioAssemblies1(Structure structOne, Structure structTwo) throw
350354
}
351355
}
352356
}
357+
358+
@Test
359+
public void testStructWithBranchedEntitiesRoundTrip() throws IOException {
360+
// Example carbohydrate remediation file to be released in July 2020
361+
URL url = new URL("https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/examples/models/1B5F-carb.cif");
362+
InputStream inStream = url.openStream();
363+
364+
MMcifParser parser = new SimpleMMcifParser();
365+
366+
SimpleMMcifConsumer consumer = new SimpleMMcifConsumer();
367+
parser.addMMcifConsumer(consumer);
368+
parser.parse(new BufferedReader(new InputStreamReader(inStream)));
369+
370+
Structure structure = consumer.getStructure();
371+
372+
AdapterToStructureData writerToEncoder = new AdapterToStructureData();
373+
new MmtfStructureWriter(structure, writerToEncoder);
374+
MmtfStructureReader mmtfStructureReader = new MmtfStructureReader();
375+
new StructureDataToAdapter(writerToEncoder, mmtfStructureReader);
376+
Structure structure2 = mmtfStructureReader.getStructure();
377+
378+
assertEquals(7, structure2.getEntityInfos().size());
379+
380+
assertEquals(2, structure2.getEntityById(1).getChains().size());
381+
assertEquals(2, structure2.getEntityById(2).getChains().size());
382+
383+
assertEquals(4, structure2.getNonPolyChains().size());
384+
assertEquals(4, structure2.getPolyChains().size());
385+
386+
assertEquals(1, structure2.getEntityById(3).getChains().size());
387+
388+
// chain asym_id="E" is from entity 3
389+
assertSame(structure2.getNonPolyChain("E"), structure2.getEntityById(3).getChains().get(0));
390+
391+
assertEquals(5, structure2.getNonPolyChain("E").getAtomGroups().size());
392+
}
353393
}

0 commit comments

Comments
 (0)