|
| 1 | +Smith Waterman - Local Alignment |
| 2 | +################################ |
| 3 | + |
| 4 | +BioJava contains implementation for various protein sequence and 3D structure alignment algorithms. Here is how to run a local, Smith-Waterman, alignment of two protein sequences: |
| 5 | + |
| 6 | + |
| 7 | + |
| 8 | +```java |
| 9 | +public static void main(String[] args) throws Exception { |
| 10 | + |
| 11 | + String uniprotID1 = "P69905"; |
| 12 | + String uniprotID2 = "P68871"; |
| 13 | + |
| 14 | + ProteinSequence s1 = getSequenceForId(uniprotID1); |
| 15 | + ProteinSequence s2 = getSequenceForId(uniprotID2); |
| 16 | + |
| 17 | + SubstitutionMatrix<AminoAcidCompound> matrix = SubstitutionMatrixHelper.getBlosum65(); |
| 18 | + |
| 19 | + GapPenalty penalty = new SimpleGapPenalty(); |
| 20 | + |
| 21 | + int gop = 8; |
| 22 | + int extend = 1; |
| 23 | + penalty.setOpenPenalty(gop); |
| 24 | + penalty.setExtensionPenalty(extend); |
| 25 | + |
| 26 | + |
| 27 | + PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman = |
| 28 | + Alignments.getPairwiseAligner(s1, s2, PairwiseSequenceAlignerType.LOCAL, penalty, matrix); |
| 29 | + |
| 30 | + SequencePair<ProteinSequence, AminoAcidCompound> pair = smithWaterman.getPair(); |
| 31 | + |
| 32 | + |
| 33 | + System.out.println(pair.toString(60)); |
| 34 | + |
| 35 | + |
| 36 | + } |
| 37 | + |
| 38 | + private static ProteinSequence getSequenceForId(String uniProtId) throws Exception { |
| 39 | + URL uniprotFasta = new URL(String.format("http://www.uniprot.org/uniprot/%s.fasta", uniProtId)); |
| 40 | + ProteinSequence seq = FastaReaderHelper.readFastaProteinSequence(uniprotFasta.openStream()).get(uniProtId); |
| 41 | + System.out.printf("id : %s %s%s%s", uniProtId, seq, System.getProperty("line.separator"), seq.getOriginalHeader()); |
| 42 | + System.out.println(); |
| 43 | + |
| 44 | + return seq; |
| 45 | + } |
| 46 | +``` |
0 commit comments