|
| 1 | +Reading and writing a Genbank file |
| 2 | +================================== |
| 3 | + |
| 4 | +There are multiple ways how to read a Genbank file. |
| 5 | + |
| 6 | +## Method 1: Read a Genbank file using the GenbankProxySequenceReader |
| 7 | + |
| 8 | +```java |
| 9 | + |
| 10 | + GenbankProxySequenceReader<AminoAcidCompound> genbankProteinReader |
| 11 | + = new GenbankProxySequenceReader<AminoAcidCompound>("/tmp", "NP_000257", AminoAcidCompoundSet.getAminoAcidCompoundSet()); |
| 12 | + ProteinSequence proteinSequence = new ProteinSequence(genbankProteinReader); |
| 13 | + genbankProteinReader.getHeaderParser().parseHeader(genbankProteinReader.getHeader(), proteinSequence); |
| 14 | + System.out.println("Sequence" + "(" + proteinSequence.getAccession() + "," + proteinSequence.getLength() + ")=" + |
| 15 | +proteinSequence.getSequenceAsString().substring(0, 10) + "..."); |
| 16 | + |
| 17 | + GenbankProxySequenceReader<NucleotideCompound> genbankDNAReader |
| 18 | + = new GenbankProxySequenceReader<NucleotideCompound>("/tmp", "NM_001126", DNACompoundSet.getDNACompoundSet()); |
| 19 | + DNASequence dnaSequence = new DNASequence(genbankDNAReader); |
| 20 | + genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence); |
| 21 | + System.out.println("Sequence" + "(" + dnaSequence.getAccession() + "," + dnaSequence.getLength() + ")=" + |
| 22 | +dnaSequence.getSequenceAsString().substring(0, 10) + "..."); |
| 23 | + |
| 24 | +``` |
| 25 | + |
| 26 | + |
| 27 | +## Method 2: Read a Genbank file using GenbankReaderHelper |
| 28 | + |
| 29 | +```java |
| 30 | + File dnaFile = new File("src/test/resources/NM_000266.gb"); |
| 31 | + File protFile = new File("src/test/resources/BondFeature.gb"); |
| 32 | + |
| 33 | + LinkedHashMap<String, DNASequence> dnaSequences = GenbankReaderHelper.readGenbankDNASequence( dnaFile ); |
| 34 | + for (DNASequence sequence : dnaSequences.values()) { |
| 35 | + System.out.println( sequence.getSequenceAsString() ); |
| 36 | + } |
| 37 | + |
| 38 | + LinkedHashMap<String, ProteinSequence> protSequences = GenbankReaderHelper.readGenbankProteinSequence(protFile); |
| 39 | + for (ProteinSequence sequence : protSequences.values()) { |
| 40 | + System.out.println( sequence.getSequenceAsString() ); |
| 41 | + } |
| 42 | + |
| 43 | +``` |
| 44 | + |
| 45 | +## Method 3: Read a Genbank file using the GenbankReader Object |
| 46 | + |
| 47 | +```java |
| 48 | + |
| 49 | + FileInputStream is = new FileInputStream(dnaFile); |
| 50 | + GenbankReader<DNASequence, NucleotideCompound> dnaReader = new GenbankReader<DNASequence, NucleotideCompound>( |
| 51 | + is, |
| 52 | + new GenericGenbankHeaderParser<DNASequence,NucleotideCompound>(), |
| 53 | + new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()) |
| 54 | + ); |
| 55 | + dnaSequences = dnaReader.process(); |
| 56 | + is.close(); |
| 57 | + System.out.println(dnaSequences); |
| 58 | + |
| 59 | + is = new FileInputStream(protFile); |
| 60 | + GenbankReader<ProteinSequence, AminoAcidCompound> protReader = new GenbankReader<ProteinSequence, AminoAcidCompound>( |
| 61 | + is, |
| 62 | + new GenericGenbankHeaderParser<ProteinSequence,AminoAcidCompound>(), |
| 63 | + new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()) |
| 64 | + ); |
| 65 | + protSequences = protReader.process(); |
| 66 | + is.close(); |
| 67 | + System.out.println(protSequences); |
| 68 | + |
| 69 | + ``` |
| 70 | + |
| 71 | + |
| 72 | +# Write a Genbank file |
| 73 | + |
| 74 | + |
| 75 | +Use the GenbankWriterHelper to write DNA sequences into a Genbank file. |
| 76 | + |
| 77 | +```java |
| 78 | + |
| 79 | + // First let's read dome DNA sequences from a genbank file |
| 80 | + |
| 81 | + File dnaFile = new File("src/test/resources/NM_000266.gb"); |
| 82 | + LinkedHashMap<String, DNASequence> dnaSequences = GenbankReaderHelper.readGenbankDNASequence( dnaFile ); |
| 83 | + ByteArrayOutputStream fragwriter = new ByteArrayOutputStream(); |
| 84 | + ArrayList<DNASequence> seqs = new ArrayList<DNASequence>(); |
| 85 | + for(DNASequence seq : dnaSequences.values()) { |
| 86 | + seqs.add(seq); |
| 87 | + } |
| 88 | + |
| 89 | + // ok now we got some DNA sequence data. Next step is to write it |
| 90 | + |
| 91 | + GenbankWriterHelper.writeNucleotideSequence(fragwriter, seqs, |
| 92 | + GenbankWriterHelper.LINEAR_DNA); |
| 93 | + |
| 94 | + // the fragwriter object now contains a string representation in the Genbank format |
| 95 | + // and you could write this into a file |
| 96 | + // or print it out on the console |
| 97 | + System.out.println(fragwriter.toString()); |
| 98 | + |
| 99 | +``` |
0 commit comments