Skip to content

Commit a9e64c4

Browse files
committed
2 parents dc53c00 + 4311f6c commit a9e64c4

11 files changed

Lines changed: 612 additions & 432 deletions

File tree

biojava3-core/src/main/java/org/biojava3/core/sequence/transcription/RNAToAminoAcidTranslator.java

Lines changed: 142 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -39,29 +39,47 @@
3939
import org.biojava3.core.sequence.views.WindowedSequence;
4040

4141
/**
42-
* Takes a {@link Sequence} of {@link NucleotideCompound} which should
43-
* represent an RNA sequence ({@link RNASequence} is good for this) and returns
44-
* a list of {@link Sequence} which hold {@link AminoAcidCompound}. The
45-
* translator can also trim stop codons as well as changing any valid
46-
* start codon to an initiating met.
42+
* Takes a {@link Sequence} of {@link NucleotideCompound} which should represent
43+
* an RNA sequence ({@link RNASequence} is good for this) and returns a list of
44+
* {@link Sequence} which hold {@link AminoAcidCompound}. The translator can
45+
* also trim stop codons as well as changing any valid start codon to an
46+
* initiating met.
4747
*
4848
* @author ayates
4949
*/
50-
public class RNAToAminoAcidTranslator extends AbstractCompoundTranslator<NucleotideCompound, AminoAcidCompound> {
50+
public class RNAToAminoAcidTranslator extends
51+
AbstractCompoundTranslator<NucleotideCompound, AminoAcidCompound> {
5152

5253
private final boolean trimStops;
5354
private final boolean initMetOnly;
5455
private final Map<Table.CaseInsensitiveTriplet, Codon> quickLookup;
5556
private final Map<AminoAcidCompound, List<Codon>> aminoAcidToCodon;
56-
//Cheeky lookup which uses a hashing value; key is to switch to using this all the time
57+
// Cheeky lookup which uses a hashing value; key is to switch to using this
58+
// all the time
5759
private final Codon[] codonArray = new Codon[64000];
5860
private final AminoAcidCompound unknownAminoAcidCompound;
5961
private final AminoAcidCompound methionineAminoAcidCompound;
6062
private final boolean translateNCodons;
6163

64+
// If true, then translation will stop at the first stop codon encountered
65+
// in the reading frame (the stop codon will be included as the last residue
66+
// in the resulting ProteinSequence, unless removed by #trimStops)
67+
private final boolean stopAtStopCodons;
68+
69+
// If true, then translation will not start until the first start codon
70+
// encountered in the reading frame. The start codon will be included as the
71+
// first residue in the resulting ProteinSequence
72+
private final boolean waitForStartCodon;
73+
74+
/**
75+
* @deprecated Retained for backwards compatability, setting
76+
* {@link #stopAtStopCodons} to <code>false</code>
77+
*/
78+
@Deprecated
6279
public RNAToAminoAcidTranslator(
6380
SequenceCreatorInterface<AminoAcidCompound> creator,
64-
CompoundSet<NucleotideCompound> nucleotides, CompoundSet<Codon> codons,
81+
CompoundSet<NucleotideCompound> nucleotides,
82+
CompoundSet<Codon> codons,
6583
CompoundSet<AminoAcidCompound> aminoAcids, Table table,
6684
boolean trimStops, boolean initMetOnly, boolean translateNCodons) {
6785

@@ -70,7 +88,46 @@ public RNAToAminoAcidTranslator(
7088
this.initMetOnly = initMetOnly;
7189
this.translateNCodons = translateNCodons;
7290

73-
quickLookup = new HashMap<Table.CaseInsensitiveTriplet, Codon>(codons.getAllCompounds().size());
91+
quickLookup = new HashMap<Table.CaseInsensitiveTriplet, Codon>(codons
92+
.getAllCompounds().size());
93+
aminoAcidToCodon = new HashMap<AminoAcidCompound, List<Codon>>();
94+
95+
List<Codon> codonList = table.getCodons(nucleotides, aminoAcids);
96+
for (Codon codon : codonList) {
97+
quickLookup.put(codon.getTriplet(), codon);
98+
codonArray[codon.getTriplet().intValue()] = codon;
99+
100+
List<Codon> codonL = aminoAcidToCodon.get(codon.getAminoAcid());
101+
if (codonL == null) {
102+
codonL = new ArrayList<Codon>();
103+
aminoAcidToCodon.put(codon.getAminoAcid(), codonL);
104+
}
105+
codonL.add(codon);
106+
107+
}
108+
unknownAminoAcidCompound = aminoAcids.getCompoundForString("X");
109+
methionineAminoAcidCompound = aminoAcids.getCompoundForString("M");
110+
// Set to false for backwards compatability
111+
stopAtStopCodons = false;
112+
waitForStartCodon = false;
113+
}
114+
115+
@Deprecated
116+
public RNAToAminoAcidTranslator(
117+
SequenceCreatorInterface<AminoAcidCompound> creator,
118+
CompoundSet<NucleotideCompound> nucleotides,
119+
CompoundSet<Codon> codons,
120+
CompoundSet<AminoAcidCompound> aminoAcids, Table table,
121+
boolean trimStops, boolean initMetOnly, boolean translateNCodons,
122+
boolean stopAtStopCodons) {
123+
124+
super(creator, nucleotides, aminoAcids);
125+
this.trimStops = trimStops;
126+
this.initMetOnly = initMetOnly;
127+
this.translateNCodons = translateNCodons;
128+
129+
quickLookup = new HashMap<Table.CaseInsensitiveTriplet, Codon>(codons
130+
.getAllCompounds().size());
74131
aminoAcidToCodon = new HashMap<AminoAcidCompound, List<Codon>>();
75132

76133
List<Codon> codonList = table.getCodons(nucleotides, aminoAcids);
@@ -79,7 +136,7 @@ public RNAToAminoAcidTranslator(
79136
codonArray[codon.getTriplet().intValue()] = codon;
80137

81138
List<Codon> codonL = aminoAcidToCodon.get(codon.getAminoAcid());
82-
if ( codonL == null){
139+
if (codonL == null) {
83140
codonL = new ArrayList<Codon>();
84141
aminoAcidToCodon.put(codon.getAminoAcid(), codonL);
85142
}
@@ -88,12 +145,51 @@ public RNAToAminoAcidTranslator(
88145
}
89146
unknownAminoAcidCompound = aminoAcids.getCompoundForString("X");
90147
methionineAminoAcidCompound = aminoAcids.getCompoundForString("M");
148+
this.stopAtStopCodons = stopAtStopCodons;
149+
// Set for backwards compatibility
150+
waitForStartCodon = false;
151+
}
152+
153+
public RNAToAminoAcidTranslator(
154+
SequenceCreatorInterface<AminoAcidCompound> creator,
155+
CompoundSet<NucleotideCompound> nucleotides,
156+
CompoundSet<Codon> codons,
157+
CompoundSet<AminoAcidCompound> aminoAcids, Table table,
158+
boolean trimStops, boolean initMetOnly, boolean translateNCodons,
159+
boolean stopAtStopCodons, boolean waitForStartCodon) {
160+
161+
super(creator, nucleotides, aminoAcids);
162+
this.trimStops = trimStops;
163+
this.initMetOnly = initMetOnly;
164+
this.translateNCodons = translateNCodons;
165+
166+
quickLookup = new HashMap<Table.CaseInsensitiveTriplet, Codon>(codons
167+
.getAllCompounds().size());
168+
aminoAcidToCodon = new HashMap<AminoAcidCompound, List<Codon>>();
169+
170+
List<Codon> codonList = table.getCodons(nucleotides, aminoAcids);
171+
for (Codon codon : codonList) {
172+
quickLookup.put(codon.getTriplet(), codon);
173+
codonArray[codon.getTriplet().intValue()] = codon;
174+
175+
List<Codon> codonL = aminoAcidToCodon.get(codon.getAminoAcid());
176+
if (codonL == null) {
177+
codonL = new ArrayList<Codon>();
178+
aminoAcidToCodon.put(codon.getAminoAcid(), codonL);
179+
}
180+
codonL.add(codon);
181+
182+
}
183+
unknownAminoAcidCompound = aminoAcids.getCompoundForString("X");
184+
methionineAminoAcidCompound = aminoAcids.getCompoundForString("M");
185+
this.stopAtStopCodons = stopAtStopCodons;
186+
this.waitForStartCodon = waitForStartCodon;
91187
}
92188

93189
/**
94-
* Performs the core conversion of RNA to Peptide. It does this by walking
95-
* a windowed version of the given sequence. Any trailing DNA base pairs
96-
* are ignored according to the specification of {@link WindowedSequence}.
190+
* Performs the core conversion of RNA to Peptide. It does this by walking a
191+
* windowed version of the given sequence. Any trailing DNA base pairs are
192+
* ignored according to the specification of {@link WindowedSequence}.
97193
*/
98194

99195
@Override
@@ -102,33 +198,50 @@ public List<Sequence<AminoAcidCompound>> createSequences(
102198

103199
List<List<AminoAcidCompound>> workingList = new ArrayList<List<AminoAcidCompound>>();
104200

105-
Iterable<SequenceView<NucleotideCompound>> iter =
106-
new WindowedSequence<NucleotideCompound>(originalSequence, 3);
201+
Iterable<SequenceView<NucleotideCompound>> iter = new WindowedSequence<NucleotideCompound>(
202+
originalSequence, 3);
107203

108204
boolean first = true;
109205

206+
// If not waiting for a start codon, start translating immediately
207+
boolean doTranslate = !waitForStartCodon;
208+
110209
for (SequenceView<NucleotideCompound> element : iter) {
111210
AminoAcidCompound aminoAcid = null;
112211

113-
int i =1;
212+
int i = 1;
114213
Table.CaseInsensitiveTriplet triplet = new Table.CaseInsensitiveTriplet(
115-
element.getCompoundAt(i++), element.getCompoundAt(i++), element.getCompoundAt(i++));
214+
element.getCompoundAt(i++), element.getCompoundAt(i++),
215+
element.getCompoundAt(i++));
116216

117217
Codon target = null;
118218

119219
target = quickLookup.get(triplet);
120-
if ( target != null)
121-
aminoAcid = target.getAminoAcid();
122-
if(aminoAcid == null && translateNCodons()) {
123-
aminoAcid = unknownAminoAcidCompound;
220+
221+
// Check for a start
222+
if (doTranslate == false && target.isStart()) {
223+
doTranslate = true;
124224
}
125-
else {
126-
if(first && initMetOnly && target.isStart()) {
127-
aminoAcid = methionineAminoAcidCompound;
225+
226+
if (doTranslate) {
227+
if (target != null)
228+
aminoAcid = target.getAminoAcid();
229+
if (aminoAcid == null && translateNCodons()) {
230+
aminoAcid = unknownAminoAcidCompound;
231+
} else {
232+
if (first && initMetOnly && target.isStart()) {
233+
aminoAcid = methionineAminoAcidCompound;
234+
}
128235
}
236+
237+
addCompoundsToList(Arrays.asList(aminoAcid), workingList);
238+
}
239+
240+
if (doTranslate && stopAtStopCodons && target.isStop()) {
241+
// Check if we need to stop, but dont stop until started!
242+
break;
129243
}
130244

131-
addCompoundsToList(Arrays.asList(aminoAcid), workingList);
132245
first = false;
133246
}
134247
postProcessCompoundLists(workingList);
@@ -151,8 +264,8 @@ protected void postProcessCompoundLists(
151264
}
152265

153266
/**
154-
* Imperfect code. Checks the last amino acid to see if a codon could
155-
* have translated a stop for it. Left in for the moment
267+
* Imperfect code. Checks the last amino acid to see if a codon could have
268+
* translated a stop for it. Left in for the moment
156269
*/
157270
protected void trimStop(List<AminoAcidCompound> sequence) {
158271
AminoAcidCompound stop = sequence.get(sequence.size() - 1);
@@ -173,8 +286,8 @@ protected void trimStop(List<AminoAcidCompound> sequence) {
173286

174287
/**
175288
* Indicates if we want to force exact translation of compounds or not i.e.
176-
* those with internal N RNA bases. This will cause a translation to an
177-
* X amino acid
289+
* those with internal N RNA bases. This will cause a translation to an X
290+
* amino acid
178291
*/
179292
public boolean translateNCodons() {
180293
return translateNCodons;

0 commit comments

Comments
 (0)