|
55 | 55 | import org.biojava3.core.sequence.template.SequenceMixin; |
56 | 56 | import org.biojava3.core.sequence.template.SequenceView; |
57 | 57 | import org.biojava3.core.util.XMLHelper; |
58 | | - |
59 | 58 | import org.slf4j.Logger; |
60 | 59 | import org.slf4j.LoggerFactory; |
61 | | - |
62 | 60 | import org.w3c.dom.Document; |
63 | 61 | import org.w3c.dom.Element; |
64 | 62 |
|
@@ -98,6 +96,37 @@ public UniprotProxySequenceReader(String accession, CompoundSet<C> compoundSet) |
98 | 96 | setContents(seq); |
99 | 97 | } |
100 | 98 |
|
| 99 | + /** |
| 100 | + * The xml is passed in as a DOM object so we know everything about the protein. |
| 101 | + * If an error occurs throw an exception. We could have a bad uniprot id |
| 102 | + * @param document |
| 103 | + * @param compoundSet |
| 104 | + * @throws Exception |
| 105 | + */ |
| 106 | + public UniprotProxySequenceReader(Document document, CompoundSet<C> compoundSet) throws Exception { |
| 107 | + setCompoundSet(compoundSet); |
| 108 | + uniprotDoc = document; |
| 109 | + String seq = this.getSequence(uniprotDoc); |
| 110 | + setContents(seq); |
| 111 | + } |
| 112 | + /** |
| 113 | + * The passed in xml is parsed as a DOM object so we know everything about the protein. |
| 114 | + * If an error occurs throw an exception. We could have a bad uniprot id |
| 115 | + * @param xml |
| 116 | + * @param compoundSet |
| 117 | + * @return UniprotProxySequenceReader |
| 118 | + * @throws Exception |
| 119 | + */ |
| 120 | + public static <C extends Compound> UniprotProxySequenceReader<C> parseUniprotXMLString(String xml, CompoundSet<C> compoundSet) { |
| 121 | + try { |
| 122 | + Document document = XMLHelper.inputStreamToDocument(new ByteArrayInputStream(xml.getBytes())); |
| 123 | + return new UniprotProxySequenceReader<C>(document, compoundSet); |
| 124 | + } catch (Exception e) { |
| 125 | + logger.error("Exception on xml parse of: {}", xml); |
| 126 | + } |
| 127 | + return null; |
| 128 | + } |
| 129 | + |
101 | 130 | public void setCompoundSet(CompoundSet<C> compoundSet) { |
102 | 131 | this.compoundSet = compoundSet; |
103 | 132 | } |
@@ -254,6 +283,27 @@ public AccessionID getAccession() { |
254 | 283 | return accessionID; |
255 | 284 | } |
256 | 285 |
|
| 286 | + /** |
| 287 | + * Pull uniprot accessions associated with this sequence |
| 288 | + * @return |
| 289 | + * @throws Exception |
| 290 | + */ |
| 291 | + public ArrayList<AccessionID> getAccessions() throws Exception { |
| 292 | + ArrayList<AccessionID> accessionList = new ArrayList<AccessionID>(); |
| 293 | + if (uniprotDoc == null) { |
| 294 | + return accessionList; |
| 295 | + } |
| 296 | + Element uniprotElement = uniprotDoc.getDocumentElement(); |
| 297 | + Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry"); |
| 298 | + ArrayList<Element> keyWordElementList = XMLHelper.selectElements(entryElement, "accession"); |
| 299 | + for (Element element : keyWordElementList) { |
| 300 | + AccessionID accessionID = new AccessionID(element.getTextContent(), DataSource.UNIPROT); |
| 301 | + accessionList.add(accessionID); |
| 302 | + } |
| 303 | + |
| 304 | + return accessionList; |
| 305 | + } |
| 306 | + |
257 | 307 | /** |
258 | 308 | * |
259 | 309 | * @param compounds |
|
0 commit comments