3939import org .biojava3 .core .sequence .views .WindowedSequence ;
4040
4141/**
42- * Takes a {@link Sequence} of {@link NucleotideCompound} which should
43- * represent an RNA sequence ({@link RNASequence} is good for this) and returns
44- * a list of {@link Sequence} which hold {@link AminoAcidCompound}. The
45- * translator can also trim stop codons as well as changing any valid
46- * start codon to an initiating met.
42+ * Takes a {@link Sequence} of {@link NucleotideCompound} which should represent
43+ * an RNA sequence ({@link RNASequence} is good for this) and returns a list of
44+ * {@link Sequence} which hold {@link AminoAcidCompound}. The translator can
45+ * also trim stop codons as well as changing any valid start codon to an
46+ * initiating met.
4747 *
4848 * @author ayates
4949 */
50- public class RNAToAminoAcidTranslator extends AbstractCompoundTranslator <NucleotideCompound , AminoAcidCompound > {
50+ public class RNAToAminoAcidTranslator extends
51+ AbstractCompoundTranslator <NucleotideCompound , AminoAcidCompound > {
5152
5253 private final boolean trimStops ;
5354 private final boolean initMetOnly ;
5455 private final Map <Table .CaseInsensitiveTriplet , Codon > quickLookup ;
5556 private final Map <AminoAcidCompound , List <Codon >> aminoAcidToCodon ;
56- //Cheeky lookup which uses a hashing value; key is to switch to using this all the time
57+ // Cheeky lookup which uses a hashing value; key is to switch to using this
58+ // all the time
5759 private final Codon [] codonArray = new Codon [64000 ];
5860 private final AminoAcidCompound unknownAminoAcidCompound ;
5961 private final AminoAcidCompound methionineAminoAcidCompound ;
6062 private final boolean translateNCodons ;
6163
64+ // If true, then translation will stop at the first stop codon encountered
65+ // in the reading frame (the stop codon will be included as the last residue
66+ // in the resulting ProteinSequence, unless removed by #trimStops)
67+ private final boolean stopAtStopCodons ;
68+
69+ // If true, then translation will not start until the first start codon
70+ // encountered in the reading frame. The start codon will be included as the
71+ // first residue in the resulting ProteinSequence
72+ private final boolean waitForStartCodon ;
73+
74+ /**
75+ * @deprecated Retained for backwards compatability, setting
76+ * {@link #stopAtStopCodons} to <code>false</code>
77+ */
78+ @ Deprecated
6279 public RNAToAminoAcidTranslator (
6380 SequenceCreatorInterface <AminoAcidCompound > creator ,
64- CompoundSet <NucleotideCompound > nucleotides , CompoundSet <Codon > codons ,
81+ CompoundSet <NucleotideCompound > nucleotides ,
82+ CompoundSet <Codon > codons ,
6583 CompoundSet <AminoAcidCompound > aminoAcids , Table table ,
6684 boolean trimStops , boolean initMetOnly , boolean translateNCodons ) {
6785
@@ -70,7 +88,46 @@ public RNAToAminoAcidTranslator(
7088 this .initMetOnly = initMetOnly ;
7189 this .translateNCodons = translateNCodons ;
7290
73- quickLookup = new HashMap <Table .CaseInsensitiveTriplet , Codon >(codons .getAllCompounds ().size ());
91+ quickLookup = new HashMap <Table .CaseInsensitiveTriplet , Codon >(codons
92+ .getAllCompounds ().size ());
93+ aminoAcidToCodon = new HashMap <AminoAcidCompound , List <Codon >>();
94+
95+ List <Codon > codonList = table .getCodons (nucleotides , aminoAcids );
96+ for (Codon codon : codonList ) {
97+ quickLookup .put (codon .getTriplet (), codon );
98+ codonArray [codon .getTriplet ().intValue ()] = codon ;
99+
100+ List <Codon > codonL = aminoAcidToCodon .get (codon .getAminoAcid ());
101+ if (codonL == null ) {
102+ codonL = new ArrayList <Codon >();
103+ aminoAcidToCodon .put (codon .getAminoAcid (), codonL );
104+ }
105+ codonL .add (codon );
106+
107+ }
108+ unknownAminoAcidCompound = aminoAcids .getCompoundForString ("X" );
109+ methionineAminoAcidCompound = aminoAcids .getCompoundForString ("M" );
110+ // Set to false for backwards compatability
111+ stopAtStopCodons = false ;
112+ waitForStartCodon = false ;
113+ }
114+
115+ @ Deprecated
116+ public RNAToAminoAcidTranslator (
117+ SequenceCreatorInterface <AminoAcidCompound > creator ,
118+ CompoundSet <NucleotideCompound > nucleotides ,
119+ CompoundSet <Codon > codons ,
120+ CompoundSet <AminoAcidCompound > aminoAcids , Table table ,
121+ boolean trimStops , boolean initMetOnly , boolean translateNCodons ,
122+ boolean stopAtStopCodons ) {
123+
124+ super (creator , nucleotides , aminoAcids );
125+ this .trimStops = trimStops ;
126+ this .initMetOnly = initMetOnly ;
127+ this .translateNCodons = translateNCodons ;
128+
129+ quickLookup = new HashMap <Table .CaseInsensitiveTriplet , Codon >(codons
130+ .getAllCompounds ().size ());
74131 aminoAcidToCodon = new HashMap <AminoAcidCompound , List <Codon >>();
75132
76133 List <Codon > codonList = table .getCodons (nucleotides , aminoAcids );
@@ -79,7 +136,7 @@ public RNAToAminoAcidTranslator(
79136 codonArray [codon .getTriplet ().intValue ()] = codon ;
80137
81138 List <Codon > codonL = aminoAcidToCodon .get (codon .getAminoAcid ());
82- if ( codonL == null ){
139+ if (codonL == null ) {
83140 codonL = new ArrayList <Codon >();
84141 aminoAcidToCodon .put (codon .getAminoAcid (), codonL );
85142 }
@@ -88,12 +145,51 @@ public RNAToAminoAcidTranslator(
88145 }
89146 unknownAminoAcidCompound = aminoAcids .getCompoundForString ("X" );
90147 methionineAminoAcidCompound = aminoAcids .getCompoundForString ("M" );
148+ this .stopAtStopCodons = stopAtStopCodons ;
149+ // Set for backwards compatibility
150+ waitForStartCodon = false ;
151+ }
152+
153+ public RNAToAminoAcidTranslator (
154+ SequenceCreatorInterface <AminoAcidCompound > creator ,
155+ CompoundSet <NucleotideCompound > nucleotides ,
156+ CompoundSet <Codon > codons ,
157+ CompoundSet <AminoAcidCompound > aminoAcids , Table table ,
158+ boolean trimStops , boolean initMetOnly , boolean translateNCodons ,
159+ boolean stopAtStopCodons , boolean waitForStartCodon ) {
160+
161+ super (creator , nucleotides , aminoAcids );
162+ this .trimStops = trimStops ;
163+ this .initMetOnly = initMetOnly ;
164+ this .translateNCodons = translateNCodons ;
165+
166+ quickLookup = new HashMap <Table .CaseInsensitiveTriplet , Codon >(codons
167+ .getAllCompounds ().size ());
168+ aminoAcidToCodon = new HashMap <AminoAcidCompound , List <Codon >>();
169+
170+ List <Codon > codonList = table .getCodons (nucleotides , aminoAcids );
171+ for (Codon codon : codonList ) {
172+ quickLookup .put (codon .getTriplet (), codon );
173+ codonArray [codon .getTriplet ().intValue ()] = codon ;
174+
175+ List <Codon > codonL = aminoAcidToCodon .get (codon .getAminoAcid ());
176+ if (codonL == null ) {
177+ codonL = new ArrayList <Codon >();
178+ aminoAcidToCodon .put (codon .getAminoAcid (), codonL );
179+ }
180+ codonL .add (codon );
181+
182+ }
183+ unknownAminoAcidCompound = aminoAcids .getCompoundForString ("X" );
184+ methionineAminoAcidCompound = aminoAcids .getCompoundForString ("M" );
185+ this .stopAtStopCodons = stopAtStopCodons ;
186+ this .waitForStartCodon = waitForStartCodon ;
91187 }
92188
93189 /**
94- * Performs the core conversion of RNA to Peptide. It does this by walking
95- * a windowed version of the given sequence. Any trailing DNA base pairs
96- * are ignored according to the specification of {@link WindowedSequence}.
190+ * Performs the core conversion of RNA to Peptide. It does this by walking a
191+ * windowed version of the given sequence. Any trailing DNA base pairs are
192+ * ignored according to the specification of {@link WindowedSequence}.
97193 */
98194
99195 @ Override
@@ -102,33 +198,50 @@ public List<Sequence<AminoAcidCompound>> createSequences(
102198
103199 List <List <AminoAcidCompound >> workingList = new ArrayList <List <AminoAcidCompound >>();
104200
105- Iterable <SequenceView <NucleotideCompound >> iter =
106- new WindowedSequence < NucleotideCompound >( originalSequence , 3 );
201+ Iterable <SequenceView <NucleotideCompound >> iter = new WindowedSequence < NucleotideCompound >(
202+ originalSequence , 3 );
107203
108204 boolean first = true ;
109205
206+ // If not waiting for a start codon, start translating immediately
207+ boolean doTranslate = !waitForStartCodon ;
208+
110209 for (SequenceView <NucleotideCompound > element : iter ) {
111210 AminoAcidCompound aminoAcid = null ;
112211
113- int i =1 ;
212+ int i = 1 ;
114213 Table .CaseInsensitiveTriplet triplet = new Table .CaseInsensitiveTriplet (
115- element .getCompoundAt (i ++), element .getCompoundAt (i ++), element .getCompoundAt (i ++));
214+ element .getCompoundAt (i ++), element .getCompoundAt (i ++),
215+ element .getCompoundAt (i ++));
116216
117217 Codon target = null ;
118218
119219 target = quickLookup .get (triplet );
120- if ( target != null )
121- aminoAcid = target . getAminoAcid ();
122- if ( aminoAcid == null && translateNCodons ()) {
123- aminoAcid = unknownAminoAcidCompound ;
220+
221+ // Check for a start
222+ if ( doTranslate == false && target . isStart ()) {
223+ doTranslate = true ;
124224 }
125- else {
126- if (first && initMetOnly && target .isStart ()) {
127- aminoAcid = methionineAminoAcidCompound ;
225+
226+ if (doTranslate ) {
227+ if (target != null )
228+ aminoAcid = target .getAminoAcid ();
229+ if (aminoAcid == null && translateNCodons ()) {
230+ aminoAcid = unknownAminoAcidCompound ;
231+ } else {
232+ if (first && initMetOnly && target .isStart ()) {
233+ aminoAcid = methionineAminoAcidCompound ;
234+ }
128235 }
236+
237+ addCompoundsToList (Arrays .asList (aminoAcid ), workingList );
238+ }
239+
240+ if (doTranslate && stopAtStopCodons && target .isStop ()) {
241+ // Check if we need to stop, but dont stop until started!
242+ break ;
129243 }
130244
131- addCompoundsToList (Arrays .asList (aminoAcid ), workingList );
132245 first = false ;
133246 }
134247 postProcessCompoundLists (workingList );
@@ -151,8 +264,8 @@ protected void postProcessCompoundLists(
151264 }
152265
153266 /**
154- * Imperfect code. Checks the last amino acid to see if a codon could
155- * have translated a stop for it. Left in for the moment
267+ * Imperfect code. Checks the last amino acid to see if a codon could have
268+ * translated a stop for it. Left in for the moment
156269 */
157270 protected void trimStop (List <AminoAcidCompound > sequence ) {
158271 AminoAcidCompound stop = sequence .get (sequence .size () - 1 );
@@ -173,8 +286,8 @@ protected void trimStop(List<AminoAcidCompound> sequence) {
173286
174287 /**
175288 * Indicates if we want to force exact translation of compounds or not i.e.
176- * those with internal N RNA bases. This will cause a translation to an
177- * X amino acid
289+ * those with internal N RNA bases. This will cause a translation to an X
290+ * amino acid
178291 */
179292 public boolean translateNCodons () {
180293 return translateNCodons ;
0 commit comments