Skip to content

Commit 1163074

Browse files
committed
Update UniprotProxySequenceReader.java
2 fixes: 1. Full Uniprot downloads contain spaces in some sequences, which cause a crash in setContents because space isn't a valid compound. I added a replaceAll to strip out the whitespace. 2. Apparently a shortName element can be repeated. In both cases where we can encounter a shortName element, get the list (perhaps of length 0 or 1) and iterate over each shortName instead of just grabbing the first one.
1 parent d17f8ef commit 1163074

1 file changed

Lines changed: 15 additions & 11 deletions

File tree

biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.java

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ public UniprotProxySequenceReader(String accession, CompoundSet<C> compoundSet)
114114
public UniprotProxySequenceReader(Document document, CompoundSet<C> compoundSet) throws CompoundNotFoundException {
115115
setCompoundSet(compoundSet);
116116
uniprotDoc = document;
117-
String seq = this.getSequence(uniprotDoc);
117+
String seq = this.getSequence(uniprotDoc).replaceAll("\\s","");
118118
setContents(seq);
119119
}
120120
/**
@@ -385,23 +385,27 @@ public ArrayList<String> getProteinAliases() throws XPathExpressionException {
385385
for (Element element : keyWordElementList) {
386386
Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");
387387
aliasList.add(fullNameElement.getTextContent());
388-
Element shortNameElement = XMLHelper.selectSingleElement(element, "shortName");
389-
if(null != shortNameElement) {
390-
String shortName = shortNameElement.getTextContent();
391-
if(null != shortName && !shortName.trim().isEmpty()) {
392-
aliasList.add(shortName);
388+
ArrayList<Element> shortNameElements = XMLHelper.selectElements(element, "shortName");
389+
for(Element shortNameElement : shortNameElements) {
390+
if(null != shortNameElement) {
391+
String shortName = shortNameElement.getTextContent();
392+
if(null != shortName && !shortName.trim().isEmpty()) {
393+
aliasList.add(shortName);
394+
}
393395
}
394396
}
395397
}
396398
keyWordElementList = XMLHelper.selectElements(proteinElement, "recommendedName");
397399
for (Element element : keyWordElementList) {
398400
Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");
399401
aliasList.add(fullNameElement.getTextContent());
400-
Element shortNameElement = XMLHelper.selectSingleElement(element, "shortName");
401-
if(null != shortNameElement) {
402-
String shortName = shortNameElement.getTextContent();
403-
if(null != shortName && !shortName.trim().isEmpty()) {
404-
aliasList.add(shortName);
402+
ArrayList<Element> shortNameElements = XMLHelper.selectElements(element, "shortName");
403+
for(Element shortNameElement : shortNameElements) {
404+
if(null != shortNameElement) {
405+
String shortName = shortNameElement.getTextContent();
406+
if(null != shortName && !shortName.trim().isEmpty()) {
407+
aliasList.add(shortName);
408+
}
405409
}
406410
}
407411
}

0 commit comments

Comments
 (0)