2222
2323import org .biojava .nbio .core .exceptions .CompoundNotFoundException ;
2424import org .biojava .nbio .core .sequence .DNASequence ;
25+ import org .biojava .nbio .core .sequence .RNASequence ;
2526import org .biojava .nbio .core .sequence .ProteinSequence ;
2627import org .biojava .nbio .core .sequence .compound .AminoAcidCompound ;
2728import org .biojava .nbio .core .sequence .compound .AminoAcidCompoundSet ;
2829import org .biojava .nbio .core .sequence .compound .DNACompoundSet ;
30+ import org .biojava .nbio .core .sequence .compound .RNACompoundSet ;
2931import org .biojava .nbio .core .sequence .compound .NucleotideCompound ;
3032import org .biojava .nbio .core .sequence .features .FeatureInterface ;
3133import org .biojava .nbio .core .sequence .features .Qualifier ;
4042import org .slf4j .LoggerFactory ;
4143
4244import java .io .BufferedInputStream ;
45+ import java .io .BufferedReader ;
4346import java .io .IOException ;
4447import java .io .InputStream ;
48+ import java .io .InputStreamReader ;
4549import java .util .ArrayList ;
4650import java .util .LinkedHashMap ;
4751import java .util .List ;
@@ -230,34 +234,60 @@ public void CDStest() throws Exception {
230234
231235 }
232236
233- private DNASequence readGenbankResource (final String resource ) throws Exception {
234- DNASequence sequence = null ;
235- InputStream inputStream = null ;
236- try {
237- inputStream = getClass ().getResourceAsStream (resource );
238-
239- GenbankReader <DNASequence , NucleotideCompound > genbankDNA
240- = new GenbankReader <>(
241- inputStream ,
242- new GenericGenbankHeaderParser <>(),
243- new DNASequenceCreator (DNACompoundSet .getDNACompoundSet ())
244- );
245- LinkedHashMap <String , DNASequence > dnaSequences = genbankDNA .process ();
246- sequence = dnaSequences .values ().iterator ().next ();
247- }
248- finally {
249- try {
250- inputStream .close ();
251- }
252- catch (Exception e ) {
253- // ignore
254- }
237+ private DNASequence readGenbankResource (final String resource ) throws IOException , CompoundNotFoundException {
238+ InputStream inputStream = getClass ().getResourceAsStream (resource );
239+ GenbankReader <DNASequence , NucleotideCompound > genbankDNA
240+ = new GenbankReader <>(
241+ inputStream ,
242+ new GenericGenbankHeaderParser <>(),
243+ new DNASequenceCreator (DNACompoundSet .getDNACompoundSet ())
244+ );
245+ LinkedHashMap <String , DNASequence > dnaSequences = genbankDNA .process ();
246+ return dnaSequences .values ().iterator ().next ();
247+ }
248+
249+ private RNASequence readGenbankRNAResource (final String resource ) throws IOException , CompoundNotFoundException {
250+ InputStream inputStream = getClass ().getResourceAsStream (resource );
251+ GenbankReader <RNASequence , NucleotideCompound > genbankRNA
252+ = new GenbankReader <>(
253+ inputStream ,
254+ new GenericGenbankHeaderParser <>(),
255+ new RNASequenceCreator (RNACompoundSet .getRNACompoundSet ())
256+ );
257+ LinkedHashMap <String , RNASequence > rnaSequences = genbankRNA .process ();
258+ return rnaSequences .values ().iterator ().next ();
259+ }
260+
261+ private ProteinSequence readGenbankProteinResource (final String resource ) throws IOException , CompoundNotFoundException {
262+ InputStream inputStream = getClass ().getResourceAsStream (resource );
263+ GenbankReader <ProteinSequence , AminoAcidCompound > genbankProtein
264+ = new GenbankReader <>(
265+ inputStream ,
266+ new GenericGenbankHeaderParser <>(),
267+ new ProteinSequenceCreator (AminoAcidCompoundSet .getAminoAcidCompoundSet ())
268+ );
269+ LinkedHashMap <String , ProteinSequence > proteinSequences = genbankProtein .process ();
270+ return proteinSequences .values ().iterator ().next ();
271+ }
272+
273+ private AbstractSequence <?> readUnknownGenbankResource (final String resource ) throws IOException , CompoundNotFoundException {
274+ InputStream inputStream = getClass ().getResourceAsStream (resource );
275+ GenbankSequenceParser genbankParser = new GenbankSequenceParser ();
276+ BufferedReader bufferedReader = new BufferedReader (new InputStreamReader (inputStream ));
277+ String seqString = genbankParser .getSequence (bufferedReader , 0 );
278+ String compoundSet = genbankParser .getCompoundType ().getClass ().getSimpleName ();
279+
280+ if (compoundSet .equals ("AminoAcidCompoundSet" )) {
281+ return readGenbankProteinResource (resource );
282+ } else if (compoundSet .equals ("RNACompoundSet" )) {
283+ return readGenbankRNAResource (resource );
284+ } else {
285+ return readGenbankResource (resource );
255286 }
256- return sequence ;
257287 }
258288
259289 @ Test
260- public void testNcbiExpandedAccessionFormats () throws Exception {
290+ public void testNcbiExpandedAccessionFormats () throws IOException , CompoundNotFoundException {
261291 DNASequence header0 = readGenbankResource ("/empty_header0.gb" );
262292 assertEquals ("CP032762 5868661 bp DNA circular BCT 15-OCT-2018" , header0 .getOriginalHeader ());
263293
@@ -267,6 +297,58 @@ public void testNcbiExpandedAccessionFormats() throws Exception {
267297 DNASequence header2 = readGenbankResource ("/empty_header2.gb" );
268298 assertEquals ("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018" , header2 .getOriginalHeader ());
269299 }
300+
301+ @ Test
302+ public void testLegacyLocusCompatable () throws IOException , CompoundNotFoundException {
303+
304+ // Testing opening a genbank file with uppercase units, strand and topology
305+ AbstractSequence header0 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io/uppercase_locus0.gb" );
306+ assertEquals ("ABC12.3_DE 7071 BP DS-DNA CIRCULAR SYN 22-JUL-1994" , header0 .getOriginalHeader ());
307+ assertEquals ("ABC12.3_DE" , header0 .getAccession ().getID ());
308+ assertEquals ("DNACompoundSet" , header0 .getCompoundSet ().getClass ().getSimpleName ());
309+
310+ // Testing uppercase SS strand
311+ AbstractSequence header1 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io//uppercase_locus1.gb" );
312+ assertEquals ("ABC12.3_DE 7071 BP SS-DNA CIRCULAR SYN 13-JUL-1994" , header1 .getOriginalHeader ());
313+ assertEquals ("ABC12.3_DE" , header1 .getAccession ().getID ());
314+ assertEquals ("DNACompoundSet" , header0 .getCompoundSet ().getClass ().getSimpleName ());
315+
316+ // Testing uppercase MS strand
317+ AbstractSequence header2 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io//uppercase_locus2.gb" );
318+ assertEquals ("ABC12.3_DE 7071 BP MS-DNA CIRCULAR SYN 13-JUL-1994" , header2 .getOriginalHeader ());
319+ assertEquals ("ABC12.3_DE" , header2 .getAccession ().getID ());
320+ assertEquals ("DNACompoundSet" , header0 .getCompoundSet ().getClass ().getSimpleName ());
321+
322+ // Testing uppercase LINEAR topology
323+ AbstractSequence header3 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io//uppercase_locus3.gb" );
324+ assertEquals ("ABC12.3_DE 7071 BP DNA LINEAR SYN 22-JUL-1994" , header3 .getOriginalHeader ());
325+ assertEquals ("ABC12.3_DE" , header3 .getAccession ().getID ());
326+ assertEquals ("DNACompoundSet" , header0 .getCompoundSet ().getClass ().getSimpleName ());
327+
328+ // Testing uppercase units with no strand or topology
329+ AbstractSequence header4 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io//uppercase_locus4.gb" );
330+ assertEquals ("ABC12.3_DE 7071 BP RNA SYN 13-JUL-1994" , header4 .getOriginalHeader ());
331+ assertEquals ("ABC12.3_DE" , header4 .getAccession ().getID ());
332+ assertEquals ("RNACompoundSet" , header4 .getCompoundSet ().getClass ().getSimpleName ());
333+
334+ // Testing uppercase units with no strand, topology, division or date
335+ AbstractSequence header5 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io//uppercase_locus5.gb" );
336+ assertEquals ("ABC12.3_DE 7071 BP DNA" , header5 .getOriginalHeader ());
337+ assertEquals ("ABC12.3_DE" , header5 .getAccession ().getID ());
338+
339+ // Testing uppercase units with no strand, molecule type, topology, division or date
340+ AbstractSequence header6 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io//uppercase_locus6.gb" );
341+ assertEquals ("ABC12.3_DE 7071 BP" , header6 .getOriginalHeader ());
342+ assertEquals ("ABC12.3_DE" , header6 .getAccession ().getID ());
343+ assertEquals ("DNACompoundSet" , header0 .getCompoundSet ().getClass ().getSimpleName ());
344+
345+ // Testing uppercase protein units
346+ AbstractSequence header7 = readUnknownGenbankResource ("/org/biojava/nbio/core/sequence/io//uppercase_locus7.gb" );
347+ assertEquals ("ABC12.3_DE 7071 AA Protein" , header7 .getOriginalHeader ());
348+ assertEquals ("ABC12.3_DE" , header7 .getAccession ().getID ());
349+ assertEquals ("AminoAcidCompoundSet" , header7 .getCompoundSet ().getClass ().getSimpleName ());
350+
351+ }
270352
271353 /**
272354 * Helper class to be able to verify the closed state of the input stream.
0 commit comments