Skip to content

Commit ae78c36

Browse files
committed
Implemented the APIs for generation of PROFEAT properties. Also implemented adaptor methods for ease of using the APIs.
git-svn-id: http://code.open-bio.org/repos/biojava/biojava-live/trunk@8997 7c6358e6-4a41-0410-a743-a5b2a554c398
1 parent e455d2f commit ae78c36

8 files changed

Lines changed: 414 additions & 124 deletions

File tree

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package org.biojava3.aaproperties;
2+
3+
import java.util.Map;
4+
5+
import org.biojava3.core.sequence.ProteinSequence;
6+
7+
public interface IProfeatProperties {
8+
/**
9+
* Based on Table 2 of http://nar.oxfordjournals.org/content/34/suppl_2/W32.full.pdf<br/>
10+
* An interface class to generate the properties of a protein sequence based on its converted attributes.<br/>
11+
* The seven different attributes are<p/>
12+
* Hydrophobicity (Polar, Neutral, Hydrophobicity)<br/>
13+
* Normalized van der Waals volume (Range 0 - 2.78, 2.95 - 4.0, 4.03 - 8.08)<br/>
14+
* Polarity (Value 4.9 - 6.2, 8.0 - 9.2, 10.4 - 13.0)<br/>
15+
* Polarizability (Value 0 - 1.08, 0.128 - 0.186, 0.219 - 0.409)<br/>
16+
* Charge (Positive, Neutral, Negative)<br/>
17+
* Secondary structure (Helix, Strand, Coil)<br/>
18+
* Solvent accessibility (Buried, Exposed, Intermediate)<br/>
19+
*
20+
* @author kohchuanhock
21+
* @version 2011.06.16
22+
*
23+
*/
24+
25+
/**
26+
* Enumeration of the seven different attributes
27+
*/
28+
public enum ATTRIBUTE {HYDROPHOBICITY, VOLUME, POLARITY, POLARIZABILITY, CHARGE, SECONDARYSTRUCTURE, SOLVENTACCESSIBILITY};
29+
/**
30+
* Enumeration of the three different groupings for each attributes
31+
*/
32+
public enum GROUPING {GROUP1, GROUP2, GROUP3};
33+
/**
34+
* Enumeration of the transition between groupA and groupB
35+
*/
36+
public enum TRANSITION {GROUP12, GROUP13, GROUP23};
37+
/**
38+
* Enumeration of the distribution for the first, first 25%, first 50%, first 75% and 100% of the grouping
39+
*/
40+
public enum DISTRIBUTION {FIRST, FIRST25, FIRST50, FIRST75, ALL};
41+
42+
/**
43+
* Returns the composition of the specific grouping for the given attribute.
44+
*
45+
* @param sequence
46+
* a protein sequence consisting of non-ambiguous characters only
47+
* @param attribute
48+
* one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
49+
* @param group
50+
* the grouping to be computed
51+
* @return
52+
* returns the composition of the specific grouping for the given attribute
53+
* @throws Exception
54+
* throws Exception if attribute or group are unknown
55+
*/
56+
public double getComposition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group) throws Exception;
57+
58+
public Map<GROUPING, Double> getComposition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception;
59+
60+
public Map<ATTRIBUTE, Map<GROUPING, Double>> getComposition(ProteinSequence sequence) throws Exception;
61+
62+
/**
63+
* Returns the number of transition between the specified groups for the given attribute with respect to the length of sequence.
64+
*
65+
* @param sequence
66+
* a protein sequence consisting of non-ambiguous characters only
67+
* @param attribute
68+
* one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
69+
* @param transition
70+
* the interested transition between the groups
71+
* @return
72+
* returns the number of transition between the specified groups for the given attribute with respect to the length of sequence.
73+
* @throws Exception
74+
* throws Exception if attribute or group are unknown
75+
*/
76+
public double getTransition(ProteinSequence sequence, ATTRIBUTE attribute, TRANSITION transition) throws Exception;
77+
78+
public Map<TRANSITION, Double> getTransition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception;
79+
80+
public Map<ATTRIBUTE, Map<TRANSITION, Double>> getTransition(ProteinSequence sequence) throws Exception;
81+
82+
/**
83+
* Computes and return the position with respect to the sequence where the given distribution of the grouping can be found.<br/>
84+
* Example: "1111122222"<br/>
85+
* For the above example,<br/>
86+
* position of the GROUPING.GROUP1 && DISTRIBUTION.FIRST = 0/10 (because the first occurrence of '1' is at position 0)<br/>
87+
* position of the GROUPING.GROUP1 && DISTRIBUTION.ALL = 4/10 (because all occurrences of '1' happens on and before position 4)<br/>
88+
*
89+
* @param sequence
90+
* a protein sequence consisting of non-ambiguous characters only
91+
* @param attribute
92+
* one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
93+
* @param group
94+
* one the three groups for the attribute
95+
* @param distribution
96+
* the distribution of the grouping
97+
*
98+
* @return
99+
* the position with respect to the length of sequence where the given distribution of the grouping can be found.<br/>
100+
* @throws Exception
101+
* throws Exception if attribute or group are unknown
102+
*/
103+
public double getPosition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group, DISTRIBUTION distribution) throws Exception;
104+
105+
public Map<DISTRIBUTION, Double> getPosition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group) throws Exception;
106+
107+
public Map<GROUPING, Map<DISTRIBUTION, Double>> getPosition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception;
108+
109+
public Map<ATTRIBUTE , Map<GROUPING, Map<DISTRIBUTION, Double>>> getPosition(ProteinSequence sequence) throws Exception;
110+
}

biojava3-aa-prop/src/main/java/org/biojava3/aaproperties/PeptidePropertiesImpl.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ public class PeptidePropertiesImpl implements IPeptideProperties{
1111

1212
@Override
1313
public double getMolecularWeight(ProteinSequence sequence) {
14+
final double hydrogenMW = 1.0079;
15+
final double hydroxideMW = 17.0073;
1416
double value = 0.0;
1517
AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
1618
for(char aa:sequence.toString().toCharArray()){
@@ -20,7 +22,9 @@ public double getMolecularWeight(ProteinSequence sequence) {
2022
}
2123
}
2224
//H 1.0079 OH 17.0073
23-
if(value > 0) value += 1.0079 + 17.0073;
25+
if(value > 0){
26+
value += hydrogenMW + hydroxideMW;
27+
}
2428
return value;
2529
}
2630

@@ -102,8 +106,8 @@ public double getApliphaticIndex(ProteinSequence sequence) {
102106
// Ala => A, Val => V, Ile => I, Leu => L
103107
AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
104108
Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence);
105-
double a = 2.9;
106-
double b = 3.9;
109+
final double a = 2.9;
110+
final double b = 3.9;
107111
double xAla = aa2Composition.get(aaSet.getCompoundForString("A"));
108112
double xVal = aa2Composition.get(aaSet.getCompoundForString("V"));
109113
double xIle = aa2Composition.get(aaSet.getCompoundForString("I"));
@@ -132,7 +136,7 @@ public double getIsoelectricPoint(ProteinSequence sequence) {
132136
double changeSize = 7.0;
133137
Map<AminoAcidCompound, Integer> chargedAA2Count = this.getChargedAACount(sequence);
134138
double margin = 1.0;
135-
double difference = 0.0000001;
139+
final double difference = 0.0000001;
136140
while(true){
137141
margin = this.getNetCharge(chargedAA2Count, currentPH);
138142
//Within allowed difference
@@ -157,16 +161,18 @@ private double getNetCharge(Map<AminoAcidCompound, Integer> chargedAA2Count, dou
157161
//Lys => K, Arg => R, His => H
158162
//Asp => D, Glu => E, Cys => C, Tyr => Y
159163
//(NH2-) 9.69 (-COOH) 2.34
164+
final double pkaOfNH2 = 9.69;
165+
final double pkaOfCOOH = 2.34;
160166
AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
161-
double nTerminalCharge = this.getPosCharge(9.69, ph);
167+
double nTerminalCharge = this.getPosCharge(pkaOfNH2, ph);
162168
double kCharge = chargedAA2Count.get(aaSet.getCompoundForString("K")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("K")), ph);
163169
double rCharge = chargedAA2Count.get(aaSet.getCompoundForString("R")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("R")), ph);
164170
double hCharge = chargedAA2Count.get(aaSet.getCompoundForString("H")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("H")), ph);
165171
double dCharge = chargedAA2Count.get(aaSet.getCompoundForString("D")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("D")), ph);
166172
double eCharge = chargedAA2Count.get(aaSet.getCompoundForString("E")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("E")), ph);
167173
double cCharge = chargedAA2Count.get(aaSet.getCompoundForString("C")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("C")), ph);
168174
double yCharge = chargedAA2Count.get(aaSet.getCompoundForString("Y")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("Y")), ph);
169-
double cTerminalCharge = this.getNegCharge(2.34, ph);
175+
double cTerminalCharge = this.getNegCharge(pkaOfCOOH, ph);
170176
if((kCharge + rCharge + hCharge) == 0.0 && (dCharge + eCharge + cCharge + yCharge) == 0.0){
171177
return 0.0;
172178
}
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
package org.biojava3.aaproperties;
2+
3+
import java.util.Map;
4+
5+
import org.biojava3.aaproperties.IProfeatProperties.ATTRIBUTE;
6+
import org.biojava3.aaproperties.IProfeatProperties.DISTRIBUTION;
7+
import org.biojava3.aaproperties.IProfeatProperties.GROUPING;
8+
import org.biojava3.aaproperties.IProfeatProperties.TRANSITION;
9+
import org.biojava3.core.sequence.ProteinSequence;
10+
11+
/**
12+
* This is an adaptor class which enable the ease of generating profeat properties.
13+
* At least one adaptor method is written for each available properties provided in IProfeatProperties.
14+
*
15+
* @author kohchuanhock
16+
* @version 2011.06.16
17+
* @see IProfeatProperties
18+
* @see ProfeatPropertiesImpl
19+
*/
20+
public class ProfeatProperties {
21+
/**
22+
* An adaptor method which returns the composition of the specific grouping for the given attribute.
23+
*
24+
* @param sequence
25+
* a protein sequence consisting of non-ambiguous characters only
26+
* @param attribute
27+
* one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
28+
* @param group
29+
* the grouping to be computed
30+
* @return
31+
* returns the composition of the specific grouping for the given attribute
32+
* @throws Exception
33+
* throws Exception if attribute or group are unknown
34+
*/
35+
public static double getComposition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group) throws Exception{
36+
return new ProfeatPropertiesImpl().getComposition(sequence, attribute, group);
37+
}
38+
39+
public static Map<GROUPING, Double> getComposition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception{
40+
return new ProfeatPropertiesImpl().getComposition(sequence, attribute);
41+
}
42+
43+
public static Map<ATTRIBUTE, Map<GROUPING, Double>> getComposition(ProteinSequence sequence) throws Exception{
44+
return new ProfeatPropertiesImpl().getComposition(sequence);
45+
}
46+
47+
/**
48+
* An adaptor method which returns the number of transition between the specified groups for the given attribute with respect to the length of sequence.
49+
*
50+
* @param sequence
51+
* a protein sequence consisting of non-ambiguous characters only
52+
* @param attribute
53+
* one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
54+
* @param transition
55+
* the interested transition between the groups
56+
* @return
57+
* returns the number of transition between the specified groups for the given attribute with respect to the length of sequence.
58+
* @throws Exception
59+
* throws Exception if attribute or group are unknown
60+
*/
61+
public static double getTransition(ProteinSequence sequence, ATTRIBUTE attribute, TRANSITION transition) throws Exception{
62+
return new ProfeatPropertiesImpl().getTransition(sequence, attribute, transition);
63+
}
64+
65+
public static Map<TRANSITION, Double> getTransition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception{
66+
return new ProfeatPropertiesImpl().getTransition(sequence, attribute);
67+
}
68+
69+
public static Map<ATTRIBUTE, Map<TRANSITION, Double>> getTransition(ProteinSequence sequence) throws Exception{
70+
return new ProfeatPropertiesImpl().getTransition(sequence);
71+
}
72+
73+
/**
74+
* An adaptor method which computes and return the position with respect to the sequence where the given distribution of the grouping can be found.<br/>
75+
* Example: "1111122222"<br/>
76+
* For the above example,<br/>
77+
* position of the GROUPING.GROUP1 && DISTRIBUTION.FIRST = 0/10 (because the first occurrence of '1' is at position 0)<br/>
78+
* position of the GROUPING.GROUP1 && DISTRIBUTION.ALL = 4/10 (because all occurrences of '1' happens on and before position 4)<br/>
79+
*
80+
* @param sequence
81+
* a protein sequence consisting of non-ambiguous characters only
82+
* @param attribute
83+
* one of the seven attributes (Hydrophobicity, Volume, Polarity, Polarizability, Charge, SecondaryStructure or SolventAccessibility)
84+
* @param group
85+
* one the three groups for the attribute
86+
* @param distribution
87+
* the distribution of the grouping
88+
*
89+
* @return
90+
* the position with respect to the length of sequence where the given distribution of the grouping can be found.<br/>
91+
* @throws Exception
92+
* throws Exception if attribute or group are unknown
93+
*/
94+
public static double getPosition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group, DISTRIBUTION distribution) throws Exception{
95+
return new ProfeatPropertiesImpl().getPosition(sequence, attribute, group, distribution);
96+
}
97+
98+
public static Map<DISTRIBUTION, Double> getPosition(ProteinSequence sequence, ATTRIBUTE attribute, GROUPING group) throws Exception{
99+
return new ProfeatPropertiesImpl().getPosition(sequence, attribute, group);
100+
}
101+
102+
public static Map<GROUPING, Map<DISTRIBUTION, Double>> getPosition(ProteinSequence sequence, ATTRIBUTE attribute) throws Exception{
103+
return new ProfeatPropertiesImpl().getPosition(sequence, attribute);
104+
}
105+
106+
public static Map<ATTRIBUTE , Map<GROUPING, Map<DISTRIBUTION, Double>>> getPosition(ProteinSequence sequence) throws Exception{
107+
return new ProfeatPropertiesImpl().getPosition(sequence);
108+
}
109+
}

0 commit comments

Comments
 (0)